void printUdpTable ( SafeBuf *p, char *title, UdpServer *server , char *coll, char *pwd , long fromIp , bool isDns ) { if ( ! coll ) coll = "main"; //if ( ! pwd ) pwd = ""; // time now long long now = gettimeofdayInMilliseconds(); // get # of used nodes //long n = server->getTopUsedSlot(); // store in buffer for sorting long times[50000];//MAX_UDP_SLOTS]; UdpSlot *slots[50000];//MAX_UDP_SLOTS]; long nn = 0; for ( UdpSlot *s = server->getActiveHead() ; s ; s = s->m_next2 ) { if ( nn >= 50000 ) { log("admin: Too many udp sockets."); break; } // if empty skip it //if ( server->isEmpty ( i ) ) continue; // get the UdpSlot //UdpSlot *s = server->getUdpSlotNum(i); // if data is NULL that's an error //if ( ! s ) continue; // store it times[nn] = now - s->m_startTime; slots[nn] = s; nn++; } // bubble sort keepSorting: // assume no swap will happen bool didSwap = false; for ( long i = 1 ; i < nn ; i++ ) { if ( times[i-1] >= times[i] ) continue; long tmpTime = times[i-1]; UdpSlot *tmpSlot = slots[i-1]; times[i-1] = times[i]; slots[i-1] = slots[i]; times[i ] = tmpTime; slots[i ] = tmpSlot; didSwap = true; } if ( didSwap ) goto keepSorting; // count how many of each msg we have long msgCount0[96]; long msgCount1[96]; for ( long i = 0; i < 96; i++ ) { msgCount0[i] = 0; msgCount1[i] = 0; } for ( long i = 0; i < nn; i++ ) { UdpSlot *s = slots[i]; if ( s->m_msgType >= 96 ) continue; if ( s->m_niceness == 0 ) msgCount0[s->m_msgType]++; else msgCount1[s->m_msgType]++; } // print the counts p->safePrintf ( "<table bgcolor=#d0d0f0 border=1>" "<tr><td bgcolor=#c0c0f0 colspan=19>" "<center>" "<b>%s Summary</b> (%li transactions)" "</td></tr>" "<tr>" "<td><b>niceness</td>" "<td><b>msg type</td>" "<td><b>total</td>" "</tr>", title , server->getNumUsedSlots() ); for ( long i = 0; i < 96; i++ ) { if ( msgCount0[i] <= 0 ) continue; p->safePrintf("<tr><td>0</td><td>0x%lx</td><td>%li</td></tr>", i, msgCount0[i]); } for ( long i = 0; i < 96; i++ ) { if ( msgCount1[i] <= 0 ) continue; p->safePrintf("<tr><td>1</td><td>0x%lx</td><td>%li</td></tr>", i, msgCount1[i]); } p->safePrintf ( "</table><br>" ); char *dd = ""; if ( ! isDns ) dd = "<td><b>msgType</td>" "<td><b>desc</td>" "<td><b>hostId</td>"; else { dd = //"<td><b>dns ip</b></td>" "<td><b>hostname</b></td>"; } // table headers for urls current being spiderd p->safePrintf ( "<table width=100%% bgcolor=#d0d0f0 border=1>" "<tr><td bgcolor=#c0c0f0 colspan=19>" "<center>" //"<font size=+1>" "<b>%s</b> (%li transactions)" //"</font>" "</td></tr>" "<tr>" "<td><b>age</td>" "<td><b>last read</td>" "<td><b>last send</td>" "<td><b>timeout</td>" "<td><b>ip</td>" //"<td><b>port</td>" //"<td><b>desc</td>" //"<td><b>hostId</td>" //"<td><b>nice</td>"; "%s" "<td><b>nice</td>" "<td><b>transId</td>" "<td><b>called</td>" "<td><b>dgrams read</td>" "<td><b>dgrams to read</td>" "<td><b>acks sent</td>" "<td><b>dgrams sent</td>" "<td><b>dgrams to send</td>" "<td><b>acks read</td>" "<td><b>resends</td>" "</tr>\n" , title , server->getNumUsedSlots() , dd ); // now fill in the columns for ( long i = 0 ; i < nn ; i++ ) { // get from sorted list UdpSlot *s = slots[i]; // set socket state //char *st = "ERROR"; //if ( ! s->isDoneReading() ) st = "reading"; //if ( ! s->isDoneSending() ) st = "reading"; // times long long elapsed0 = (now - s->m_startTime ) ; long long elapsed1 = (now - s->m_lastReadTime ) ; long long elapsed2 = (now - s->m_lastSendTime ) ; char e0[32],e1[32], e2[32]; sprintf ( e0 , "%llims" , elapsed0 ); sprintf ( e1 , "%llims" , elapsed1 ); sprintf ( e2 , "%llims" , elapsed2 ); if ( s->m_startTime == 0LL ) strcpy ( e0 , "--" ); if ( s->m_lastReadTime == 0LL ) strcpy ( e1 , "--" ); if ( s->m_lastSendTime == 0LL ) strcpy ( e2 , "--" ); // bgcolor is lighter for incoming requests char *bg = "#c0c0f0"; // is it incoming if ( ! s->m_callback ) bg = "#e8e8ff"; Host *h = g_hostdb.getHost ( s->m_ip , s->m_port ); char *eip = "??"; unsigned short eport = 0 ; //long ehostId = -1 ; char *ehostId = "-1"; //char tmpIp [64]; // print the ip char tmpHostId[64]; if ( h ) { // host can have 2 ip addresses, get the one most // similar to that of the requester eip = iptoa(g_hostdb.getBestIp ( h , fromIp )); //eip = iptoa(h->m_externalIp) ; //eip = iptoa(h->m_ip) ; eport = h->m_externalHttpPort ; //ehostId = h->m_hostId ; if ( h->m_isProxy ) sprintf(tmpHostId,"proxy%li",h->m_hostId); else sprintf(tmpHostId,"%li",h->m_hostId); ehostId = tmpHostId; } // if no corresponding host, it could be a request from an external // cluster, so just show the ip else { sprintf ( tmpHostId , "%s" , iptoa(s->m_ip) ); ehostId = tmpHostId; eip = tmpHostId; } // set description of the msg long msgType = s->m_msgType; char *desc = ""; char *rbuf = s->m_readBuf; char *sbuf = s->m_sendBuf; long rbufSize = s->m_readBufSize; long sbufSize = s->m_sendBufSize; bool weInit = s->m_callback; char calledHandler = s->m_calledHandler; if ( weInit ) calledHandler = s->m_calledCallback; char *buf = NULL; long bufSize = 0; char tt [ 64 ]; if ( msgType == 0x00 && weInit ) buf = sbuf; if ( msgType == 0x00 && ! weInit ) buf = rbuf; if ( msgType == 0x01 && weInit ) buf = sbuf; if ( msgType == 0x01 && ! weInit ) buf = rbuf; // . if callback was called this slot's sendbuf can be bogus // . i put this here to try to avoid a core dump if ( msgType == 0x13 && weInit && ! s->m_calledCallback ) { buf = sbuf; bufSize = sbufSize; } if ( msgType == 0x13 && ! weInit ) { buf = rbuf; bufSize = rbufSize; } if ( buf ) { long rdbId = -1; if (msgType == 0x01) rdbId = buf[0]; //else rdbId = buf[8+sizeof(key_t)*2+16]; else rdbId = buf[24]; Rdb *rdb = NULL; if ( rdbId >= 0 && ! isDns ) rdb = getRdbFromId ((uint8_t)rdbId ); char *cmd; if ( msgType == 0x01 ) cmd = "add to"; else cmd = "get from"; tt[0] = ' '; tt[1]='\0'; if ( rdb ) sprintf ( tt , "%s %s" , cmd,rdb->m_dbname ); desc = tt; } if ( msgType == 0x10 ) desc = "add links"; if ( msgType == 0x0c ) desc = "getting ip"; if ( msgType == 0x0d ) desc = "get outlink ips/qualities"; if ( msgType == 0x11 ) desc = "ping"; if ( msgType == 0x12 ) desc = "get lock"; if ( msgType == 0x06 ) desc = "spider lock"; if ( msgType == 0x04 ) desc = "meta add"; if ( msgType == 0x13 ) { char isRobotsTxt = 1; if ( buf && bufSize >= (long)sizeof(Msg13Request)-(long)MAX_URL_LEN ) { Msg13Request *r = (Msg13Request *)buf; isRobotsTxt = r->m_isRobotsTxt; } if ( isRobotsTxt ) desc = "get robots.txt"; else desc = "get web page"; } if ( msgType == 0x09 ) desc = "add site"; if ( msgType == 0x08 ) desc = "get site"; if ( msgType == 0x8b ) desc = "get catid"; if ( msgType == 0x34 ) desc = "get load"; if ( msgType == 0x02 ) desc = "get lists"; if ( msgType == 0x22 ) desc = "get titlerec"; if ( msgType == 0x36 ) desc = "get termFreq"; if ( msgType == 0x20 ) desc = "get summary"; if ( msgType == 0x2c ) desc = "get address"; if ( msgType == 0x24 ) desc = "get gigabits"; if ( msgType == 0x39 ) desc = "get docids"; if ( msgType == 0x17 ) desc = "cache access"; if ( msgType == 0x23 ) desc = "get linktext"; if ( msgType == 0x07 ) desc = "inject"; if ( msgType == 0x35 ) desc = "merge token"; if ( msgType == 0x3b ) desc = "get docid score"; if ( msgType == 0x50 ) desc = "get root quality"; if ( msgType == 0x25 ) desc = "get link info"; if ( msgType == 0xfd ) desc = "proxy forward"; p->safePrintf ( "<tr bgcolor=%s>" "<td>%s</td>" // age "<td>%s</td>" // last read "<td>%s</td>" // last send "<td>%li</td>", // timeout bg , e0 , e1 , e2 , s->m_timeout ); // now use the ip for dns and hosts p->safePrintf("<td>%s:%lu</td>", iptoa(s->m_ip),(long)s->m_port); char *cf1 = ""; char *cf2 = ""; if ( s->m_convertedNiceness ) { cf1 = "<font color=red>"; cf2 = "</font>"; } if ( isDns ) { //p->safePrintf("<td>%s</td>",iptoa(s->m_ip)); char *hostname = (char *)s->m_tmpVar; p->safePrintf("<td><nobr>%s" ,hostname); // get the domain from the hostname long dlen; char *dbuf = ::getDomFast ( hostname,&dlen,false); p->safePrintf( " <a href=\"/master/tagdb?" "user=admin&" "tagtype0=manualban&" "tagdata0=1&" "u=%s&c=%s\">" "[<font color=red><b>BAN %s</b></font>]" "</nobr></a> " , dbuf , coll , dbuf ); p->safePrintf("</td>" "<td>%s%li%s</td>", cf1, (long)s->m_niceness, cf2); } if ( ! isDns ) //"<td>%s</td>" // ip //"<td>%hu</td>" // port // clickable hostId //"<td><a href=http://%s:%hu/cgi/15.cgi>%li</a></td>" p->safePrintf ( "<td>0x%hhx</td>" // msgtype "<td><nobr>%s</nobr></td>" // desc "<td><a href=http://%s:%hu/" "master/sockets?" "c=%s>%s</a></td>" "<td>%s%li%s</td>" , // niceness s->m_msgType , desc, //iptoa(s->m_ip) , //s->m_port , // begin clickable hostId eip , eport , coll , ehostId , cf1, (long)s->m_niceness, cf2 // end clickable hostId ); p->safePrintf ( "<td>%lu</td>" // transId "<td>%i</td>" // called handler "<td>%li</td>" // dgrams read "<td>%li</td>" // dgrams to read "<td>%li</td>" // acks sent "<td>%li</td>" // dgrams sent "<td>%li</td>" // dgrams to send "<td>%li</td>" // acks read "<td>%hhu</td>" // resend count "</tr>\n" , s->m_transId, calledHandler, s->getNumDgramsRead() , s->m_dgramsToRead , s->getNumAcksSent() , s->getNumDgramsSent() , s->m_dgramsToSend , s->getNumAcksRead() , s->m_resendCount ); } // end the table p->safePrintf ("</table><br>\n" ); }
void Syncdb::syncStart_r ( bool amThread ) { // turn this off g_process.m_suspendAutoSave = true; char cmd[1024]; // get synchost best ip char *ips = iptoa ( g_hostdb.getAliveIp ( g_hostdb.m_syncHost ) ); // his dir char *dir = g_hostdb.m_syncHost->m_dir; // use Host *me = g_hostdb.m_myHost; // ours char *mydir = me->m_dir; // generic long err; // loop over every rdb and every data and map file in each rdb for ( long i = 0 ; i < RDB_END ; i++ ) { // skip SYNCDB if ( i == RDB_SYNCDB ) continue; // get that rdb Rdb *rdb = getRdbFromId ( i ); // skip if none if ( ! rdb ) continue; // get coll for ( long j = 0 ; j < rdb->getNumBases() ; j++ ) { // get that base RdbBase *base = rdb->getBase(j);//m_bases[j]; if ( ! base ) continue; // get coll char *coll = base->m_coll; // and num long collnum = base->m_collnum; // make the dir sprintf ( cmd , "ssh %s 'mkdir %scoll.%s.%li'", ips,dir,coll,collnum); // excecute log ( LOG_INFO, "sync: %s", cmd ); //int err = my_system_r ( cmd, 3600*24 ); //if ( err != 0 ) goto hadError; // copy the files for ( long k = 0 ; k < base->m_numFiles ; k++ ) { // sleep while dumping. we are in a thread. if ( base->isDumping() ) sleep ( 1 ); // get map RdbMap *map = base->m_maps[k]; // copy the map file sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'", map->getFilename(),ips,dir,coll,collnum); log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // get the file BigFile *f = base->m_files[k]; // loop over each little part file for ( long m = 0 ; m < f->m_numParts ; m++ ) { // get part file File *p = f->m_files[m]; // copy that sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'", p->m_filename,ips,dir,coll,collnum); // excecute log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; } } } } // make the dirs sprintf ( cmd , "ssh %s '" "mkdir %s/dict/ ;" "mkdir %s/dict/en/ ;" "mkdir %s/ucdata/ ;" "mkdir %s/.antiword/ ;" "'" , ips, dir, dir, dir, dir ); // excecute log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // loop over the files in Process.cpp for ( long i = 0 ; i < 99999 ; i++ ) { // null means end if ( ! g_files[i] ) break; sprintf ( cmd , "rcp %s%s %s:%s", mydir,g_files[i],ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; } // new guy is NOT in sync sprintf ( cmd , "ssh %s 'echo 0 > %sinsync.dat", ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // saved files sprintf ( cmd , "rcp %s*-saved.dat %s:%sinsync.dat", mydir,ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); // MDW: take out for now //if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // completed! return; // hadError: log ( "sync: Call to system(\"%s\") had error %s.",cmd,strerror(err)); g_hostdb.m_syncHost->m_doingSync = 0; g_hostdb.m_syncHost = NULL; return; }
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc. // . if list is stored locally this tries to get it locally // . otherwise tries to get the list from the network // . returns false if blocked, true otherwise // . sets g_errno on error // . NOTE: i was having problems with queries being cached too long, you // see the cache here is a NETWORK cache, so when the machines that owns // the list updates it on disk it can't flush our cache... so use a small // maxCacheAge of like , 30 seconds or so... bool Msg0::getList ( long long hostId , // host to ask (-1 if none) long ip , // info on hostId short port , long maxCacheAge , // max cached age in seconds bool addToCache , // add net recv'd list to cache? char rdbId , // specifies the rdb char *coll , RdbList *list , //key_t startKey , //key_t endKey , char *startKey , char *endKey , long minRecSizes , // use -1 for no max void *state , void (* callback)(void *state ),//, RdbList *list ) , long niceness , bool doErrorCorrection , bool includeTree , bool doMerge , long firstHostId , long startFileNum , long numFiles , long timeout , long long syncPoint , long preferLocalReads , Msg5 *msg5 , Msg5 *msg5b , bool isRealMerge , //#ifdef SPLIT_INDEXDB bool allowPageCache , bool forceLocalIndexdb , bool noSplit , // doIndexdbSplit , long forceParitySplit ) { //#else // bool allowPageCache ) { //#endif // this is obsolete! mostly, but we need it for PageIndexdb.cpp to // show a "termlist" for a given query term in its entirety so you // don't have to check each machine in the network. if this is true it // means to query each split and merge the results together into a // single unified termlist. only applies to indexdb/datedb. //if ( doIndexdbSplit ) { char *xx = NULL; *xx = 0; } // note this because if caller is wrong it hurts performance major!! //if ( doIndexdbSplit ) // logf(LOG_DEBUG,"net: doing msg0 with indexdb split true"); // warning if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg0."); //if ( doIndexdbSplit ) { char *xx=NULL;*xx=0; } // reset the list they passed us list->reset(); // get keySize of rdb m_ks = getKeySizeFromRdbId ( rdbId ); // if startKey > endKey, don't read anything //if ( startKey > endKey ) return true; if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue // . reset hostid if it is dead // . this is causing UOR queries to take forever when we have a dead if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1; // no longer accept negative minrecsize if ( minRecSizes < 0 ) { g_errno = EBADENGINEER; log(LOG_LOGIC, "net: msg0: Negative minRecSizes no longer supported."); char *xx=NULL;*xx=0; return true; } // debug msg //if ( niceness != 0 ) log("HEY start"); // ensure startKey last bit clear, endKey last bit set //if ( (startKey.n0 & 0x01) == 0x01 ) // log("Msg0::getList: warning startKey lastbit set"); //if ( (endKey.n0 & 0x01) == 0x00 ) // log("Msg0::getList: warning endKey lastbit clear"); // remember these m_state = state; m_callback = callback; m_list = list; m_hostId = hostId; m_niceness = niceness; //m_ip = ip; //m_port = port; m_addToCache = addToCache; // . these define our request 100% //m_startKey = startKey; //m_endKey = endKey; KEYSET(m_startKey,startKey,m_ks); KEYSET(m_endKey,endKey,m_ks); m_minRecSizes = minRecSizes; m_rdbId = rdbId; m_coll = coll; m_isRealMerge = isRealMerge; m_allowPageCache = allowPageCache; // . group to ask is based on the first key // . we only do 1 group per call right now // . groupMask must turn on higher bits first (count downwards kinda) // . titledb and spiderdb use special masks to get groupId // did they force it? core until i figure out what this is if ( forceParitySplit >= 0 ) m_groupId = g_hostdb.getGroupId ( forceParitySplit ); else m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit ); // how is this used? if ( forceLocalIndexdb ) m_groupId = g_hostdb.m_groupId; // . store these parameters // . get a handle to the rdb in case we can satisfy locally // . returns NULL and sets g_errno on error QUICKPOLL((m_niceness)); Rdb *rdb = getRdbFromId ( m_rdbId ); if ( ! rdb ) return true; // we need the fixedDataSize m_fixedDataSize = rdb->getFixedDataSize(); m_useHalfKeys = rdb->useHalfKeys(); // . debug msg // . Msg2 does this when checking for a cached compound list. // compound lists do not actually exist, they are merges of smaller // UOR'd lists. if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree)) log(LOG_LOGIC,"net: msg0: " "Weird. check but don't add... rdbid=%li.",(long)m_rdbId); // set this here since we may not call msg5 if list not local //m_list->setFixedDataSize ( m_fixedDataSize ); // . now that we do load balancing we don't want to do a disk lookup // even if local if we are merging or dumping // . UNLESS g_conf.m_preferLocalReads is true if ( preferLocalReads == -1 ) preferLocalReads = g_conf.m_preferLocalReads; // . always prefer local for full split clusterdb // . and keep the tfndb/titledb lookups in the same stripe // . so basically we can't do biased caches if fully split //if ( g_conf.m_fullSplit ) preferLocalReads = true; preferLocalReads = true; // it it stored locally? bool isLocal = ( m_hostId == -1 && g_hostdb.m_groupId == m_groupId ); // only do local lookups if this is true if ( ! preferLocalReads ) isLocal = false; /* m_numSplit = 1; if ( g_hostdb.m_indexSplits > 1 && ( rdbId == RDB_POSDB || rdbId==RDB_DATEDB)&& ! forceLocalIndexdb && doIndexdbSplit ) { isLocal = false; //m_numSplit = INDEXDB_SPLIT; m_numSplit = g_hostdb.m_indexSplits; char *xx=NULL;*xx=0; } */ /* long long singleDocIdQuery = 0LL; if ( rdbId == RDB_POSDB ) { long long d1 = g_posdb.getDocId(m_startKey); long long d2 = g_posdb.getDocId(m_endKey); if ( d1+1 == d2 ) singleDocIdQuery = d1; } // . try the LOCAL termlist cache // . so when msg2 is evaluating a gbdocid:| query and it has to // use msg0 to go across the network to get the same damn termlist // over and over again for the same docid, this will help alot. // . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to // send the same gbdocid:xxxx docids to the same hosts. maybe hash // based on docid into the list of hosts and if that host is busy // just chain until we find someone not busy. if ( singleDocIdQuery && getListFromTermListCache ( coll, m_startKey, m_endKey, maxCacheAge, list ) ) // found! return true; */ // but always local if only one host if ( g_hostdb.getNumHosts() == 1 ) isLocal = true; // force a msg0 if doing a docid restrictive query like // gbdocid:xxxx|<query> so we call cacheTermLists() //if ( singleDocIdQuery ) isLocal = false; // . if the group is local then do it locally // . Msg5::getList() returns false if blocked, true otherwise // . Msg5::getList() sets g_errno on error // . don't do this if m_hostId was specified if ( isLocal ) { // && !g_conf.m_interfaceMachine ) { if ( msg5 ) { m_msg5 = msg5; m_deleteMsg5 = false; } else { try { m_msg5 = new ( Msg5 ); } catch ( ... ) { g_errno = ENOMEM; log("net: Local alloc for disk read failed " "while tring to read data for %s. " "Trying remote request.", getDbnameFromId(m_rdbId)); goto skip; } mnew ( m_msg5 , sizeof(Msg5) , "Msg0" ); m_deleteMsg5 = true; } QUICKPOLL(m_niceness); // same for msg5b if ( msg5b ) { m_msg5b = msg5b; m_deleteMsg5b = false; } else if ( m_rdbId == RDB_TITLEDB ) { try { m_msg5b = new ( Msg5 ); } catch ( ... ) { g_errno = ENOMEM; log("net: Local alloc for disk read failed " "while tring to read data for %s. " "Trying remote request. 2.", getDbnameFromId(m_rdbId)); goto skip; } mnew ( m_msg5b , sizeof(Msg5) , "Msg0b" ); m_deleteMsg5b = true; } QUICKPOLL(m_niceness); if ( ! m_msg5->getList ( rdbId, coll , m_list , m_startKey , m_endKey , m_minRecSizes , includeTree , // include Tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , this , gotListWrapper2 , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum -1 , // maxRetries true , // compensateForMerge syncPoint , m_msg5b , m_isRealMerge , m_allowPageCache ) ) return false; // nuke it reset(); return true; } skip: // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG,"net: msg0: Sending request for data to " "group=%li listPtr=%li minRecSizes=%li termId=%llu " //"startKey.n1=%lx,n0=%llx (niceness=%li)", "startKey.n1=%llx,n0=%llx (niceness=%li)", g_hostdb.makeHostId ( m_groupId ) ,(long)m_list, m_minRecSizes, g_posdb.getTermId(m_startKey) , //m_startKey.n1,m_startKey.n0 , (long)m_niceness); KEY1(m_startKey,m_ks),KEY0(m_startKey), (long)m_niceness); char *replyBuf = NULL; long replyBufMaxSize = 0; bool freeReply = true; // adjust niceness for net transmission bool realtime = false; //if ( minRecSizes + 32 < TMPBUFSIZE ) realtime = true; // if we're niceness 0 we need to pre-allocate for reply since it // might be received within the asynchronous signal handler which // cannot call mmalloc() if ( realtime ) { // niceness <= 0 || netnice == 0 ) { // . we should not get back more than minRecSizes bytes since // we are now performing merges // . it should not slow things down too much since the hashing // is 10 times slower than merging anyhow... // . CAUTION: if rdb is not fixed-datasize then this will // not work for us! it can exceed m_minRecSizes. replyBufMaxSize = m_minRecSizes ; // . get a little extra to fix the error where we ask for 64 // but get 72 // . where is that coming from? // . when getting titleRecs we often exceed the minRecSizes // . ?Msg8? was having trouble. was short 32 bytes sometimes. replyBufMaxSize += 36; // why add ten percent? //replyBufMaxSize *= 110 ; //replyBufMaxSize /= 100 ; // make a buffer to hold the reply //#ifdef SPLIT_INDEXDB /* if ( m_numSplit > 1 ) { m_replyBufSize = replyBufMaxSize * m_numSplit; replyBuf = (char *) mmalloc(m_replyBufSize, "Msg0"); m_replyBuf = replyBuf; freeReply = false; } else */ //#endif replyBuf = (char *) mmalloc(replyBufMaxSize , "Msg0"); // g_errno is set and we return true if it failed if ( ! replyBuf ) { log("net: Failed to pre-allocate %li bytes to hold " "data read remotely from %s: %s.", replyBufMaxSize,getDbnameFromId(m_rdbId), mstrerror(g_errno)); return true; } } // . make a request with the info above (note: not in network order) // . IMPORTANT!!!!! if you change this change // Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!! // no, not anymore, we commented out that request peeking code char *p = m_request; *(long long *) p = syncPoint ; p += 8; //*(key_t *) p = m_startKey ; p += sizeof(key_t); //*(key_t *) p = m_endKey ; p += sizeof(key_t); *(long *) p = m_minRecSizes ; p += 4; *(long *) p = startFileNum ; p += 4; *(long *) p = numFiles ; p += 4; *(long *) p = maxCacheAge ; p += 4; *p = m_rdbId ; p++; *p = addToCache ; p++; *p = doErrorCorrection; p++; *p = includeTree ; p++; *p = (char)niceness ; p++; *p = (char)m_allowPageCache; p++; KEYSET(p,m_startKey,m_ks); ; p+=m_ks; KEYSET(p,m_endKey,m_ks); ; p+=m_ks; // NULL terminated collection name strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0'; m_requestSize = p - m_request; // ask an individual host for this list if hostId is NOT -1 if ( m_hostId != -1 ) { // get Host Host *h = g_hostdb.getHost ( m_hostId ); if ( ! h ) { g_errno = EBADHOSTID; log(LOG_LOGIC,"net: msg0: Bad hostId of %lli.", m_hostId); return true; } // if niceness is 0, use the higher priority udpServer UdpServer *us ; unsigned short port; QUICKPOLL(m_niceness); //if ( niceness <= 0 || netnice == 0 ) { //if ( realtime ) { // us = &g_udpServer2; port = h->m_port2; } //else { us = &g_udpServer ; port = h->m_port ; // . returns false on error and sets g_errno, true otherwise // . calls callback when reply is received (or error) // . we return true if it returns false if ( ! us->sendRequest ( m_request , m_requestSize , 0x00 , // msgType h->m_ip , port , m_hostId , NULL , // the slotPtr this , gotSingleReplyWrapper , timeout , -1 , // backoff -1 , // maxwait replyBuf , replyBufMaxSize , m_niceness ) ) // cback niceness return true; // return false cuz it blocked return false; } // timing debug if ( g_conf.m_logTimingNet ) m_startTime = gettimeofdayInMilliseconds(); else m_startTime = 0; //if ( m_rdbId == RDB_INDEXDB ) log("Msg0:: getting remote indexlist. " // "termId=%llu, " // "groupNum=%lu", // g_indexdb.getTermId(m_startKey) , // g_hostdb.makeHostId ( m_groupId ) ); /* // make the cache key so we can see what remote host cached it, if any char cacheKey[MAX_KEY_BYTES]; //key_t cacheKey = makeCacheKey ( startKey , makeCacheKey ( startKey , endKey , includeTree , minRecSizes , startFileNum , numFiles , cacheKey , m_ks ); */ // . get the top long of the key // . i guess this will work for 128 bit keys... hmmmmm long keyTop = hash32 ( (char *)startKey , m_ks ); /* // allocate space if ( m_numSplit > 1 ) { long need = m_numSplit * sizeof(Multicast) ; char *buf = (char *)mmalloc ( need,"msg0mcast" ); if ( ! buf ) return true; m_mcasts = (Multicast *)buf; for ( long i = 0; i < m_numSplit ; i++ ) m_mcasts[i].constructor(); } */ // . otherwise, multicast to a host in group "groupId" // . returns false and sets g_errno on error // . calls callback on completion // . select first host to send to in group based on upper 32 bits // of termId (m_startKey.n1) //#ifdef SPLIT_INDEXDB // . need to send out to all the indexdb split hosts m_numRequests = 0; m_numReplies = 0; //for ( long i = 0; i < m_numSplit; i++ ) { QUICKPOLL(m_niceness); long gr; char *buf; /* if ( m_numSplit > 1 ) { gr = g_indexdb.getSplitGroupId ( baseGroupId, i ); buf = &replyBuf[i*replyBufMaxSize]; } else { */ gr = m_groupId; buf = replyBuf; //} // get the multicast Multicast *m = &m_mcast; //if ( m_numSplit > 1 ) m = &m_mcasts[i]; if ( ! m->send ( m_request , //#else // if ( ! m_mcast.send ( m_request , //#endif m_requestSize, 0x00 , // msgType 0x00 false , // does multicast own request? //#ifdef SPLIT_INDEXDB gr , // group + offset //#else // m_groupId , // group to send to (groupKey) //#endif false , // send to whole group? //m_startKey.n1, // key is passed on startKey keyTop , // key is passed on startKey this , // state data NULL , // state data gotMulticastReplyWrapper0 , timeout , // timeout in seconds (was 30) niceness , realtime , firstHostId , //#ifdef SPLIT_INDEXDB // &replyBuf[i*replyBufMaxSize] , //#else // replyBuf , //#endif buf , replyBufMaxSize , freeReply , // free reply buf? true , // do disk load balancing? maxCacheAge , //(key_t *)cacheKey , // multicast uses it for determining the best // host to send the request to when doing // disk load balancing. if the host has our // data cached, then it will probably get to // handle the request. for now let's just assume // this is a 96-bit key. TODO: fix... 0 , // *(key_t *)cacheKey , rdbId , minRecSizes ) ) { log("net: Failed to send request for data from %s in group " "#%li over network: %s.", getDbnameFromId(m_rdbId),m_groupId, mstrerror(g_errno)); // no, multicast will free this when it is destroyed //if (replyBuf) mfree ( replyBuf , replyBufMaxSize , "Msg22" ); // but speed it up //#ifdef SPLIT_INDEXDB m_errno = g_errno; m->reset(); if ( m_numRequests > 0 ) return false; //#else // m_mcast.reset(); //#endif return true; } //#ifdef SPLIT_INDEXDB m_numRequests++; //#endif // we blocked return false; }
void printUdpTable ( SafeBuf *p, const char *title, UdpServer *server , const char *coll, int32_t fromIp , bool isDns ) { if ( ! coll ) coll = "main"; // time now int64_t now = gettimeofdayInMilliseconds(); // get # of used nodes //int32_t n = server->getTopUsedSlot(); // store in buffer for sorting int32_t times[50000];//MAX_UDP_SLOTS]; UdpSlot *slots[50000];//MAX_UDP_SLOTS]; int32_t nn = 0; for ( UdpSlot *s = server->getActiveHead() ; s ; s = s->m_next2 ) { if ( nn >= 50000 ) { log("admin: Too many udp sockets."); break; } // if empty skip it //if ( server->isEmpty ( i ) ) continue; // get the UdpSlot //UdpSlot *s = server->getUdpSlotNum(i); // if data is NULL that's an error //if ( ! s ) continue; // store it times[nn] = now - s->m_startTime; slots[nn] = s; nn++; } // bubble sort keepSorting: // assume no swap will happen bool didSwap = false; for ( int32_t i = 1 ; i < nn ; i++ ) { if ( times[i-1] >= times[i] ) continue; int32_t tmpTime = times[i-1]; UdpSlot *tmpSlot = slots[i-1]; times[i-1] = times[i]; slots[i-1] = slots[i]; times[i ] = tmpTime; slots[i ] = tmpSlot; didSwap = true; } if ( didSwap ) goto keepSorting; // count how many of each msg we have int32_t msgCount0[MAX_MSG_TYPES] = {}; int32_t msgCount1[MAX_MSG_TYPES] = {}; for ( int32_t i = 0; i < nn; i++ ) { UdpSlot *s = slots[i]; if ( s->m_niceness == 0 ) msgCount0[s->getMsgType()]++; else msgCount1[s->getMsgType()]++; } const char *wr = ""; if ( server->m_writeRegistered ) wr = " [write registered]"; // print the counts p->safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=19>" "<center>" "<b>%s Summary</b> (%" PRId32" transactions)%s" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>niceness</td>" "<td><b>msg type</td>" "<td><b>total</td>" "</tr>", TABLE_STYLE, title , server->getNumUsedSlots() , wr , DARK_BLUE ); for ( int32_t i = 0; i < 96; i++ ) { if ( msgCount0[i] <= 0 ) continue; p->safePrintf("<tr bgcolor=#%s>" "<td>0</td><td>0x%" PRIx32"</td><td>%" PRId32"</td></tr>", LIGHT_BLUE,i, msgCount0[i]); } for ( int32_t i = 0; i < 96; i++ ) { if ( msgCount1[i] <= 0 ) continue; p->safePrintf("<tr bgcolor=#%s>" "<td>1</td><td>0x%" PRIx32"</td><td>%" PRId32"</td></tr>", LIGHT_BLUE,i, msgCount1[i]); } p->safePrintf ( "</table><br>" ); const char *dd = ""; if ( ! isDns ) dd = "<td><b>msgType</td>" "<td><b>desc</td>" "<td><b>hostId</td>"; else { dd = //"<td><b>dns ip</b></td>" "<td><b>hostname</b></td>"; } //UdpSlot *slot = server->m_head3; //int32_t callbackReadyCount = 0; //for ( ; slot ; slot = slot->m_next3 , callbackReadyCount++ ); p->safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=19>" "<center>" //"<font size=+1>" "<b>%s</b> (%" PRId32" transactions)" //"(%" PRId32" requests waiting to processed)" "(%" PRId32" incoming)" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>age</td>" "<td><b>last read</td>" "<td><b>last send</td>" "<td><b>timeout</td>" "<td><b>ip</td>" //"<td><b>port</td>" //"<td><b>desc</td>" //"<td><b>hostId</td>" //"<td><b>nice</td>"; "%s" "<td><b>nice</td>" "<td><b>transId</td>" "<td><b>called</td>" "<td><b>dgrams read</td>" "<td><b>dgrams to read</td>" "<td><b>acks sent</td>" "<td><b>dgrams sent</td>" "<td><b>dgrams to send</td>" "<td><b>acks read</td>" "<td><b>resends</td>" "</tr>\n" , TABLE_STYLE, title , server->getNumUsedSlots() , //callbackReadyCount , server->getNumUsedSlotsIncoming() , DARK_BLUE , dd ); // now fill in the columns for ( int32_t i = 0 ; i < nn ; i++ ) { // get from sorted list UdpSlot *s = slots[i]; // set socket state //char *st = "ERROR"; //if ( ! s->isDoneReading() ) st = "reading"; //if ( ! s->isDoneSending() ) st = "reading"; // times int64_t elapsed0 = (now - s->m_startTime ) ; int64_t elapsed1 = (now - s->m_lastReadTime ) ; int64_t elapsed2 = (now - s->m_lastSendTime ) ; char e0[32],e1[32], e2[32]; sprintf ( e0 , "%" PRId64"ms" , elapsed0 ); sprintf ( e1 , "%" PRId64"ms" , elapsed1 ); sprintf ( e2 , "%" PRId64"ms" , elapsed2 ); if ( s->m_startTime == 0LL ) strcpy ( e0 , "--" ); if ( s->m_lastReadTime == 0LL ) strcpy ( e1 , "--" ); if ( s->m_lastSendTime == 0LL ) strcpy ( e2 , "--" ); // bgcolor is lighter for incoming requests const char *bg = LIGHT_BLUE;//"c0c0f0"; // is it incoming if ( ! s->m_callback ) bg = LIGHTER_BLUE;//"e8e8ff"; Host *h = g_hostdb.getHost ( s->m_ip , s->m_port ); const char *eip = "??"; uint16_t eport = 0 ; //int32_t ehostId = -1 ; const char *ehostId = "-1"; //char tmpIp [64]; // print the ip char tmpHostId[64]; if ( h ) { // host can have 2 ip addresses, get the one most // similar to that of the requester eip = iptoa(g_hostdb.getBestIp ( h , fromIp )); //eip = iptoa(h->m_externalIp) ; //eip = iptoa(h->m_ip) ; eport = h->m_externalHttpPort ; //ehostId = h->m_hostId ; if ( h->m_isProxy ) sprintf(tmpHostId,"proxy%" PRId32,h->m_hostId); else sprintf(tmpHostId,"%" PRId32,h->m_hostId); ehostId = tmpHostId; } // if no corresponding host, it could be a request from an external // cluster, so just show the ip else { sprintf ( tmpHostId , "%s" , iptoa(s->m_ip) ); ehostId = tmpHostId; eip = tmpHostId; } // set description of the msg msg_type_t msgType = s->getMsgType(); const char *desc = ""; char *rbuf = s->m_readBuf; char *sbuf = s->m_sendBuf; int32_t rbufSize = s->m_readBufSize; int32_t sbufSize = s->m_sendBufSize; bool weInit = s->m_callback; char calledHandler = s->m_calledHandler; if ( weInit ) calledHandler = s->m_calledCallback; char *buf = NULL; int32_t bufSize = 0; char tt [ 64 ]; if (msgType == msg_type_0) { buf = weInit ? sbuf : rbuf; } else if (msgType == msg_type_1) { buf = weInit ? sbuf : rbuf; } else if (msgType == msg_type_13) { // . if callback was called this slot's sendbuf can be bogus // . i put this here to try to avoid a core dump if (weInit) { if (!s->m_calledCallback) { buf = sbuf; bufSize = sbufSize; } } else { buf = rbuf; bufSize = rbufSize; } } if ( buf ) { int32_t rdbId = (msgType == msg_type_1) ? buf[0] : buf[24]; Rdb *rdb = NULL; if (rdbId >= 0 && !isDns) { rdb = getRdbFromId((uint8_t) rdbId); } tt[0] = ' '; tt[1] = '\0'; if (rdb) { const char *cmd = ( msgType == msg_type_1 ) ? "add to" : "get from"; sprintf(tt, "%s %s", cmd, rdb->m_dbname); } desc = tt; } if ( msgType == msg_type_c ) { desc = "getting ip"; } else if ( msgType == msg_type_11 ) { desc = "ping"; } else if ( msgType == msg_type_4 ) { desc = "meta add"; } else if ( msgType == msg_type_13 ) { bool isRobotsTxt = true; if ( buf && bufSize >= (int32_t)sizeof(Msg13Request)-(int32_t)MAX_URL_LEN ) { Msg13Request *r = (Msg13Request *)buf; isRobotsTxt = r->m_isRobotsTxt; } desc = isRobotsTxt ? "get robots.txt" : "get web page"; } else if ( msgType == msg_type_22 ) { desc = "get titlerec"; } else if ( msgType == msg_type_20 ) { desc = "get summary"; } else if ( msgType == msg_type_39 ) { desc = "get docids"; } else if ( msgType == msg_type_7 ) { desc = "inject"; } else if ( msgType == msg_type_25 ) { desc = "get link info"; } else if ( msgType == msg_type_fd ) { desc = "proxy forward"; } p->safePrintf ( "<tr bgcolor=#%s>" "<td>%s</td>" // age "<td>%s</td>" // last read "<td>%s</td>" // last send "<td>%" PRId64"</td>", // timeout bg , e0 , e1 , e2 , s->m_timeout ); // now use the ip for dns and hosts p->safePrintf("<td>%s:%" PRIu32"</td>", iptoa(s->m_ip),(uint32_t)s->m_port); const char *cf1 = ""; const char *cf2 = ""; if ( s->m_convertedNiceness ) { cf1 = "<font color=red>"; cf2 = "</font>"; } if ( isDns ) { p->safePrintf("<td><nobr>%s", s->m_hostname); // get the domain from the hostname int32_t dlen; char *dbuf = ::getDomFast ( s->m_hostname,&dlen,false); p->safePrintf( " <a href=\"/admin/tagdb?user=admin&tagtype0=manualban&tagdata0=1&u=%s&c=%s\">" "[<font color=red><b>BAN %s</b></font>]</nobr></a> " , dbuf , coll , dbuf ); p->safePrintf("</td><td>%s%" PRId32"%s</td>", cf1, (int32_t)s->m_niceness, cf2); } else { // clickable hostId const char *toFrom = "to"; if ( ! s->m_callback ) toFrom = "from"; p->safePrintf ( "<td>0x%02x</td>" // msgtype "<td><nobr>%s</nobr></td>" // desc "<td><nobr>%s <a href=http://%s:%hu/" "admin/sockets?" "c=%s>%s</a></nobr></td>" "<td>%s%" PRId32"%s</td>" , // niceness s->getMsgType() , desc, // begin clickable hostId toFrom, eip , eport , coll , ehostId , cf1, (int32_t)s->m_niceness, cf2 // end clickable hostId ); } const char *rf1 = ""; const char *rf2 = ""; if ( s->m_resendCount ) { rf1 = "<b style=color:red;>"; rf2 = "</b>"; } p->safePrintf ( "<td>%" PRIu32"</td>" // transId "<td>%i</td>" // called handler "<td>%" PRId32"</td>" // dgrams read "<td>%" PRId32"</td>" // dgrams to read "<td>%" PRId32"</td>" // acks sent "<td>%" PRId32"</td>" // dgrams sent "<td>%" PRId32"</td>" // dgrams to send "<td>%" PRId32"</td>" // acks read "<td>%s%hhu%s</td>" // resend count "</tr>\n" , (uint32_t)s->m_transId, calledHandler, s->getNumDgramsRead() , s->m_dgramsToRead , s->getNumAcksSent() , s->getNumDgramsSent() , s->m_dgramsToSend , s->getNumAcksRead() , rf1 , s->m_resendCount , rf2 ); } // end the table p->safePrintf ("</table><br>\n" ); }
// . reply to a request for an RdbList // . MUST call g_udpServer::sendReply or sendErrorReply() so slot can // be destroyed void handleRequest0 ( UdpSlot *slot , long netnice ) { // if niceness is 0, use the higher priority udpServer UdpServer *us = &g_udpServer; //if ( netnice == 0 ) us = &g_udpServer2; // get the request char *request = slot->m_readBuf; long requestSize = slot->m_readBufSize; // collection is now stored in the request, so i commented this out //if ( requestSize != MSG0_REQ_SIZE ) { // log("net: Received bad data request size of %li bytes. " // "Should be %li.", requestSize ,(long)MSG0_REQ_SIZE); // us->sendErrorReply ( slot , EBADREQUESTSIZE ); // return; //} // parse the request char *p = request; long long syncPoint = *(long long *)p ; p += 8; //key_t startKey = *(key_t *)p ; p += sizeof(key_t); //key_t endKey = *(key_t *)p ; p += sizeof(key_t); long minRecSizes = *(long *)p ; p += 4; long startFileNum = *(long *)p ; p += 4; long numFiles = *(long *)p ; p += 4; long maxCacheAge = *(long *)p ; p += 4; char rdbId = *p++; char addToCache = *p++; char doErrorCorrection = *p++; char includeTree = *p++; // this was messing up our niceness conversion logic long niceness = slot->m_niceness;//(long)(*p++); // still need to skip it though! p++; bool allowPageCache = (bool)(*p++); char ks = getKeySizeFromRdbId ( rdbId ); char *startKey = p; p+=ks; char *endKey = p; p+=ks; // then null terminated collection char *coll = p; // error set from XmlDoc::cacheTermLists()? if ( g_errno ) { us->sendErrorReply ( slot , EBADRDBID ); return;} // is this being called from callWaitingHandlers() //bool isRecall = (netnice == 99); // . get the rdb we need to get the RdbList from // . returns NULL and sets g_errno on error //Msg0 msg0; //Rdb *rdb = msg0.getRdb ( rdbId ); Rdb *rdb = getRdbFromId ( rdbId ); if ( ! rdb ) { us->sendErrorReply ( slot , EBADRDBID ); return;} // keep track of stats rdb->readRequestGet ( requestSize ); /* // keep track of stats if ( ! isRecall ) rdb->readRequestGet ( requestSize ); long long singleDocId2 = 0LL; if ( rdbId == RDB_POSDB && maxCacheAge ) { long long d1 = g_posdb.getDocId(startKey); long long d2 = g_posdb.getDocId(endKey); if ( d1+1 == d2 ) singleDocId2 = d1; } // have we parsed this docid and cached its termlists? bool shouldBeCached2 = false; if ( singleDocId2 && isDocIdInTermListCache ( singleDocId2 , coll ) ) shouldBeCached2 = true; // if in the termlist cache, send it back right away char *trec; long trecSize; if ( singleDocId2 && getRecFromTermListCache(coll, startKey, endKey, maxCacheAge, &trec, &trecSize) ) { // if in cache send it back! us->sendReply_ass(trec,trecSize,trec,trecSize,slot); return; } // if should be cached but was not found then it's probably a // synonym form not in the doc content. make an empty list then. if ( shouldBeCached2 ) { // send back an empty termlist us->sendReply_ass(NULL,0,NULL,0,slot); return; } // MUST be in termlist cache! if not in there it is a probably // a synonym term termlist of a word in the doc. if ( isRecall ) { // send back an empty termlist us->sendReply_ass(NULL,0,NULL,0,slot); return; } // init waiting table? static bool s_waitInit = false; if ( ! s_waitInit ) { // do not repeat s_waitInit = true; // niceness = 0 if ( ! g_waitingTable.set(8,4,2048,NULL,0,true,0,"m5wtbl")){ log("msg5: failed to init waiting table"); // error kills us! us->sendErrorReply ( slot , EBADRDBID ); return; } } // wait in waiting table? if ( singleDocId2 && g_waitingTable.isInTable ( &singleDocId2 ) ) { g_waitingTable.addKey ( &singleDocId2 , &slot ); return; } // if it's for a special gbdocid: query then cache ALL termlists // for this docid into g_termListCache right now if ( singleDocId2 ) { // have all further incoming requests for this docid // wait in the waiting table g_waitingTable.addKey ( &singleDocId2 , &slot ); // load the title rec and store its posdb termlists in cache XmlDoc *xd; try { xd = new ( XmlDoc ); } catch ( ... ) { g_errno = ENOMEM; us->sendErrorReply ( slot , g_errno ); return; } mnew ( xd, sizeof(XmlDoc),"msg0xd"); // always use niceness 1 now even though we use niceness 0 // to make the cache hits fast //niceness = 1; // . load the old title rec first and just recycle all // . typically there might be a few hundred related docids // each with 50,000 matching queries on average to evaluate // with the gbdocid:xxxx| restriction? if ( ! xd->set3 ( singleDocId2 , coll , niceness ) ) { us->sendErrorReply ( slot , g_errno ); return;} // init the new xmldoc xd->m_callback1 = callWaitingHandlers; xd->m_state = xd; // . if this blocks then return // . should call loadOldTitleRec() and get JUST the posdb recs // by setting m_useTitledb, etc. to false. then it should // make posdb termlists with the compression using // RdbList::addRecord() and add those lists to // g_termListCache if ( ! xd->cacheTermLists ( ) ) return; // otherwise, it completed right away! callWaitingHandlers ( xd ); return; } */ /* // init special sectiondb cache? if ( rdbId == RDB_SECTIONDB && ! s_initCache ) { // try to init cache if ( ! s_sectiondbCache.init ( 20000000 , // 20MB max mem -1 , // fixed data size false , // support lists? 20000 , // 20k max recs false , // use half keys? "secdbche", // dbname false, // load from disk? sizeof(key128_t), //cachekeysize 0 , // data key size 20000 )) // numPtrs max log("msg0: failed to init sectiondb cache: %s", mstrerror(g_errno)); else s_initCache = true; } // check the sectiondb cache if ( rdbId == RDB_SECTIONDB ) { //long long sh48 = g_datedb.getTermId((key128_t *)startKey); // use the start key now!!! char *data; long dataSize; if (s_sectiondbCache.getRecord ( coll, startKey,//&sh48, &data, &dataSize, true, // docopy? 600, // maxage (10 mins) true, // inc counts? NULL, // cachedtime true // promoteRec? )){ // debug //log("msg0: got sectiondblist in cache datasize=%li", // dataSize); // send that back g_udpServer.sendReply_ass ( data , dataSize , data , dataSize , slot , 60 , NULL , doneSending_ass , -1 , -1 , true ); return; } } */ // . do a local get // . create a msg5 to get the list State00 *st0 ; try { st0 = new (State00); } catch ( ... ) { g_errno = ENOMEM; log("Msg0: new(%i): %s", sizeof(State00),mstrerror(g_errno)); us->sendErrorReply ( slot , g_errno ); return; } mnew ( st0 , sizeof(State00) , "State00" ); // timing debug if ( g_conf.m_logTimingNet ) st0->m_startTime = gettimeofdayInMilliseconds(); // save slot in state st0->m_slot = slot; // save udp server to send back reply on st0->m_us = us; // init this one st0->m_niceness = niceness; st0->m_rdbId = rdbId; QUICKPOLL(niceness); // debug msg if ( maxCacheAge != 0 && ! addToCache ) log(LOG_LOGIC,"net: msg0: check but don't add... rdbid=%li.", (long)rdbId); // . if this request came over on the high priority udp server // make sure the priority gets passed along // . return if this blocks // . we'll call sendReply later if ( ! st0->m_msg5.getList ( rdbId , coll , &st0->m_list , startKey , endKey , minRecSizes , includeTree , // include tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , st0 , gotListWrapper , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum 2 , // maxRetries true , // compensateForMerge syncPoint , &st0->m_msg5b , false, allowPageCache ) ) return; // call wrapper ouselves gotListWrapper ( st0 , NULL , NULL ); }
// . slot should be auto-nuked upon transmission or error // . TODO: ensure if this sendReply() fails does it really nuke the slot? void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) { // get the state State00 *st0 = (State00 *)state; // extract the udp slot and list and msg5 UdpSlot *slot = st0->m_slot; RdbList *list = &st0->m_list; Msg5 *msg5 = &st0->m_msg5; UdpServer *us = st0->m_us; // sanity check -- ensure they match //if ( niceness != st0->m_niceness ) // log("Msg0: niceness mismatch"); // debug msg //if ( niceness != 0 ) // log("HEY! niceness is not 0"); // timing debug if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) { //log("Msg0:hndled request %llu",gettimeofdayInMilliseconds()); long size = -1; if ( list ) size = list->getListSize(); log(LOG_TIMING|LOG_DEBUG, "net: msg0: Handled request for data. " "Now sending data termId=%llu size=%li" " transId=%li ip=%s port=%i took=%lli " "(niceness=%li).", g_posdb.getTermId(msg5->m_startKey), size,slot->m_transId, iptoa(slot->m_ip),slot->m_port, gettimeofdayInMilliseconds() - st0->m_startTime , st0->m_niceness ); } // debug //if ( ! msg5->m_includeTree ) // log("hotit\n"); // on error nuke the list and it's data if ( g_errno ) { mdelete ( st0 , sizeof(State00) , "Msg0" ); delete (st0); // TODO: free "slot" if this send fails us->sendErrorReply ( slot , g_errno ); return; } QUICKPOLL(st0->m_niceness); // point to the serialized list in "list" char *data = list->getList(); long dataSize = list->getListSize(); char *alloc = list->getAlloc(); long allocSize = list->getAllocSize(); // tell list not to free the data since it is a reply so UdpServer // will free it when it destroys the slot list->setOwnData ( false ); // keep track of stats Rdb *rdb = getRdbFromId ( st0->m_rdbId ); if ( rdb ) rdb->sentReplyGet ( dataSize ); // TODO: can we free any memory here??? // keep track of how long it takes to complete the send st0->m_startTime = gettimeofdayInMilliseconds(); // debug point long oldSize = msg5->m_minRecSizes; long newSize = msg5->m_minRecSizes + 20; // watch for wrap around if ( newSize < oldSize ) newSize = 0x7fffffff; if ( dataSize > newSize && list->getFixedDataSize() == 0 && // do not annoy me with these linkdb msgs dataSize > newSize+100 ) log(LOG_LOGIC,"net: msg0: Sending more data than what was " "requested. Ineffcient. Bad engineer. dataSize=%li " "minRecSizes=%li.",dataSize,oldSize); /* // always compress these lists if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) { // get sh48, the sitehash key128_t *startKey = (key128_t *)msg5->m_startKey ; long long sh48 = g_datedb.getTermId(startKey); // debug //log("msg0: got sectiondblist from disk listsize=%li", // list->getListSize()); if ( dataSize > 50000 ) log("msg0: sending back list rdb=%li " "listsize=%li sh48=0x%llx", (long)st0->m_rdbId, dataSize, sh48); // save it long origDataSize = dataSize; // store compressed list on itself char *dst = list->m_list; // warn if niceness is 0! if ( st0->m_niceness == 0 ) log("msg0: compressing sectiondb list at niceness 0!"); // compress the list uint32_t lastVoteHash32 = 0LL; SectionVote *lastVote = NULL; for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) { // breathe QUICKPOLL ( st0->m_niceness ); // get rec char *rec = list->getCurrentRec(); // for ehre key128_t *key = (key128_t *)rec; // the score is the bit which is was set in // Section::m_flags for that docid long secType = g_indexdb.getScore ( (char *)key ); // 0 means it probably used to count # of voters // from this site, so i don't think xmldoc uses // that any more if ( secType == SV_SITE_VOTER ) continue; // treat key like a datedb key and get the taghash uint32_t h32 = g_datedb.getDate ( key ); // get data/vote from the current record in the // sectiondb list SectionVote *sv=(SectionVote *)list->getCurrentData (); // get the average score for this doc float avg = sv->m_score ; if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled; // if same as last guy, add to it if ( lastVoteHash32 == h32 && lastVote ) { // turn possible multi-vote into single docid // into a single vote, with the score averaged. lastVote->m_score += avg; lastVote->m_numSampled++; continue; } // otherwise, add in a new guy! *(key128_t *)dst = *key; dst += sizeof(key128_t); // the new vote SectionVote *dsv = (SectionVote *)dst; dsv->m_score = avg; dsv->m_numSampled = 1; // set this lastVote = dsv; lastVoteHash32 = h32; // skip over dst += sizeof(SectionVote); } // update the list size now for sending back dataSize = dst - data; // if the list was over the requested minrecsizes we need // to set a flag so that the caller will do a re-call. // so making the entire odd, will be the flag. if ( origDataSize > msg5->m_minRecSizes && dataSize < origDataSize ) { *dst++ = '\0'; dataSize++; } // debug //log("msg0: compressed sectiondblist from disk " // "newlistsize=%li", dataSize); // use this timestamp long now = getTimeLocal();//Global(); // finally, cache this sucker s_sectiondbCache.addRecord ( msg5->m_coll, (char *)startKey,//(char *)&sh48 data, dataSize , now ); // ignore errors g_errno = 0; } */ // // for linkdb lists, remove all the keys that have the same IP32 // and store a count of what we removed somewhere // if ( st0->m_rdbId == RDB_LINKDB ) { // store compressed list on itself char *dst = list->m_list; // keep stats long totalOrigLinks = 0; long ipDups = 0; long lastIp32 = 0; char *listEnd = list->getListEnd(); // compress the list for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) { // breathe QUICKPOLL ( st0->m_niceness ); // count it totalOrigLinks++; // get rec char *rec = list->getCurrentRec(); long ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec ); // same as one before? if ( ip32 == lastIp32 && // are we the last rec? include that for // advancing the m_nextKey in Linkdb more // efficiently. rec + LDBKS < listEnd ) { ipDups++; continue; } // store it memcpy (dst , rec , LDBKS ); dst += LDBKS; // update it lastIp32 = ip32; } // . if we removed one key, store the stats // . caller should recognize reply is not a multiple of // the linkdb key size LDBKS and no its there! if ( ipDups ) { //*(long *)dst = totalOrigLinks; //dst += 4; //*(long *)dst = ipDups; //dst += 4; } // update list parms list->m_listSize = dst - list->m_list; list->m_listEnd = list->m_list + list->m_listSize; data = list->getList(); dataSize = list->getListSize(); } //log("sending replySize=%li min=%li",dataSize,msg5->m_minRecSizes); // . TODO: dataSize may not equal list->getListMaxSize() so // Mem class may show an imblanace // . now g_udpServer is responsible for freeing data/dataSize // . the "true" means to call doneSending_ass() from the signal handler // if need be st0->m_us->sendReply_ass ( data , dataSize , alloc , // alloc allocSize , // alloc size slot , 60 , st0 , doneSending_ass , -1 , -1 , true ); }
// . destroys the slot if false is returned // . this is registered in Msg1::set() to handle add rdb record msgs // . seems like we should always send back a reply so we don't leave the // requester's slot hanging, unless he can kill it after transmit success??? // . TODO: need we send a reply back on success???? // . NOTE: Must always call g_udpServer::sendReply or sendErrorReply() so // read/send bufs can be freed void handleRequest1 ( UdpSlot *slot , int32_t netnice ) { // extract what we read char *readBuf = slot->m_readBuf; int32_t readBufSize = slot->m_readBufSize; int32_t niceness = slot->m_niceness; // select udp server based on niceness UdpServer *us = &g_udpServer; // must at least have an rdbId if ( readBufSize <= 4 ) { g_errno = EREQUESTTOOSHORT; log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Request too short", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot , g_errno ); return; } char *p = readBuf; char *pend = readBuf + readBufSize; // extract rdbId char rdbId = *p++; // get the rdb to which it belongs, use Msg0::getRdb() Rdb *rdb = getRdbFromId ( (char) rdbId ); if ( ! rdb ) { log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad rdbid", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot, EBADRDBID ); return; } // keep track of stats rdb->readRequestAdd ( readBufSize ); // reset g_errno g_errno = 0; // are we injecting some title recs? bool injecting; if ( *p & 0x80 ) injecting = true; else injecting = false; p++; // then collection //char *coll = p; //p += strlen (p) + 1; collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t); // . make a list from this data // . skip over the first 4 bytes which is the rdbId // . TODO: embed the rdbId in the msgtype or something... RdbList list; // set the list list.set ( p , // readBuf + 4 , pend - p , // readBufSize - 4 , p , // readBuf + 4 , pend - p , // readBufSize - 4 , rdb->getFixedDataSize() , false , // ownData? rdb->useHalfKeys() , rdb->getKeySize () ); // note it //log("msg1: handlerequest1 calling addlist niceness=%" PRId32,niceness); //log("msg1: handleRequest1 niceness=%" PRId32,niceness); // this returns false and sets g_errno on error rdb->addList ( collnum , &list , niceness); // if titledb, add tfndb recs to map the title recs //if ( ! g_errno && rdb == g_titledb.getRdb() && injecting ) // updateTfndb ( coll , &list , true, 0); // but if deleting a "new" and unforced record from spiderdb // then only delete tfndb record if it was tfn=255 //if ( ! g_errno && rdb == g_spiderdb.getRdb() ) // updateTfndb2 ( coll , &list , false ); // retry on some errors addedList ( slot , rdb ); }
// . buffer is used for reading and writing // . return false if blocked, true otherwise // . sets g_errno on error // . if niceness is 0 merge will block, otherwise will not block // . we now use niceness of 1 which should spawn threads that don't allow // niceness 2 threads to launch while they're running // . spider process now uses mostly niceness 2 // . we need the merge to take priority over spider processes on disk otherwise // there's too much contention from spider lookups on disk for the merge // to finish in a decent amount of time and we end up getting too many files! bool RdbMerge::merge ( char rdbId , //char *coll , //RdbBase *base , collnum_t collnum, BigFile *target , RdbMap *targetMap , int32_t id2 , // target's secondary id int32_t startFileNum , int32_t numFiles , int32_t niceness , void *pc , int64_t maxTargetFileSize , char keySize ) { // reset ourselves reset(); // set it m_rdbId = rdbId; Rdb *rdb = getRdbFromId ( rdbId ); // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base = getRdbBase( m_rdbId, collnum ); if ( ! base ) { return true; } // don't breech the max //if ( numFiles > m_maxFilesToMerge ) numFiles = m_maxFilesToMerge; // reset this map! it's m_crcs needs to be reset //targetMap->reset(); // remember some parms //if ( ! coll && rdb->m_isCollectionLess ) // strcpy ( m_coll , rdb->m_dbname ); //else // strcpy ( m_coll , coll ); m_collnum = collnum; if ( rdb->m_isCollectionLess ) m_collnum = 0; m_target = target; m_targetMap = targetMap; m_id2 = id2; m_startFileNum = startFileNum; m_numFiles = numFiles; m_dedup = base->m_dedup; m_fixedDataSize = base->m_fixedDataSize; m_niceness = niceness; //m_pc = pc; m_maxTargetFileSize = maxTargetFileSize; m_doneMerging = false; m_ks = keySize; // . set the key range we want to retrieve from the files // . just get from the files, not tree (not cache?) //m_startKey.setMin(); //m_endKey.setMax(); KEYMIN(m_startKey,m_ks); KEYMAX(m_endKey,m_ks); // if we're resuming a killed merge, set m_startKey to last // key the map knows about. // the dump will start dumping at the end of the targetMap's data file. if ( m_targetMap->getNumRecs() > 0 ) { log(LOG_INIT,"db: Resuming a killed merge."); //m_startKey = m_targetMap->getLastKey(); m_targetMap->getLastKey(m_startKey); //m_startKey += (uint32_t) 1; KEYADD(m_startKey,m_ks); // if power goes out and we are not doing synchronous writes // then we could have completely lost some data and unlinked // a part file from the file being merged, so that the data is // gone. to be able to resume merging, we must increment the // startKey until it references a valid offset in all the // files being merged. invalid offsets will reference parts // that have been chopped. /* RdbMap **maps = rdb->getMaps(); BigFile **files = rdb->getFiles(); for ( int32_t i=m_startFileNum;i<m_startFileNum+m_numFiles;i++){ int64_t minOff = 0LL; int32_t k = 0; while ( k < files[i]->m_maxParts && ! files[i]->m_files[k] ) { k++; minOff += MAX_PART_SIZE; } int32_t pn0 = maps[i]->getPage ( m_startKey ); int32_t pn = pn0; while ( maps[i]->getAbsoluteOffset(pn) < minOff ) pn++; if ( pn != pn0 ) { log("db: Lost data during merge. Starting " "merge at page number %" PRId32" from %" PRId32" for " "file.",pn,pn0); m_startKey = maps[i]->getKey ( pn ); } } */ } // free our list's memory, just in case //m_list.freeList(); // . we may have multiple hosts running on the same cpu/hardDrive // . therefore, to maximize disk space, we should only have 1 merge // at a time going on between these hosts // . now tfndb has own merge class since titledb merge writes url recs /* if ( s_isMergeLocked ) { //log("RdbMerge::merge: someone else merging sleeping."); log("RdbMerge::merge: someone else merging. bad engineer."); return false; // if it fails then sleep until it works //returng_loop.registerSleepCallback(5000,this,getLockWrapper); } */ return gotLock(); }
// . return false if blocked, true otherwise // . sets g_errno on error bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) { // debug msg //log("sendData: mcast=%lu listSize=%li", // (long)&m_mcast,(long)listSize); // bail if this is an interface machine, don't write to the main if ( g_conf.m_interfaceMachine ) return true; // return true if no data if ( listSize == 0 ) return true; // how many hosts in this group //long numHosts = g_hostdb.getNumHostsPerShard(); // . NOTE: for now i'm removing this until I handle ETRYAGAIN errors // properly... by waiting and retrying... // . if this is local data just for us just do an addList to OUR rdb /* if ( groupId == g_hostdb.m_groupId && numHosts == 1 ) { // this sets g_errno on error Msg0 msg0; Rdb *rdb = msg0.getRdb ( (char) m_rdbId ); if ( ! rdb ) return true; // make a list from this data RdbList list; list.set (listData,listSize,listSize,rdb->getFixedDataSize(), false) ; // ownData? // this returns false and sets g_errno on error rdb->addList ( &list ); // . if we got a ETRYAGAIN cuz the buffer we add to was full // then we should sleep and try again! // . return false cuz this blocks for a period of time // before trying again if ( g_errno == ETRYAGAIN ) { // try adding again in 1 second registerSleepCallback ( 1000, slot, tryAgainWrapper1 ); // return now return false; } // . always return true cuz we did not block // . g_errno may be set return true; } */ // if the data is being added to our group, don't send ourselves // a msg1, if we can add it right now bool sendToSelf = true; if ( shardNum == getMyShardNum() && ! g_conf.m_interfaceMachine ) { // get the rdb to which it belongs, use Msg0::getRdb() Rdb *rdb = getRdbFromId ( (char) m_rdbId ); if ( ! rdb ) goto skip; // key size long ks = getKeySizeFromRdbId ( m_rdbId ); // reset g_errno g_errno = 0; // . make a list from this data // . skip over the first 4 bytes which is the rdbId // . TODO: embed the rdbId in the msgtype or something... RdbList list; // set the list list.set ( listData , listSize , listData , listSize , rdb->getFixedDataSize() , false , // ownData? rdb->useHalfKeys() , ks ); // note that //log("msg1: local addlist niceness=%li",m_niceness); // this returns false and sets g_errno on error rdb->addList ( m_coll , &list , m_niceness ); // if titledb, add tfndb recs to map the title recs //if ( ! g_errno && rdb == g_titledb.getRdb() && m_injecting ) // // this returns false and sets g_errno on error // updateTfndb ( m_coll , &list , true , m_niceness); // if no error, no need to use a Msg1 UdpSlot for ourselves if ( ! g_errno ) sendToSelf = false; else { log("rdb: msg1 had error: %s",mstrerror(g_errno)); // this is messing up generate catdb's huge rdblist add // why did we put it in there??? from msg9b.cpp //return true; } QUICKPOLL(m_niceness); // if we're the only one in the group, bail, we're done if ( ! sendToSelf && g_hostdb.getNumHostsPerShard() == 1 ) return true; } skip: // . make an add record request to multicast to a bunch of machines // . this will alloc new space, returns NULL on failure //char *request = makeRequest ( listData, listSize, groupId , //m_rdbId , &requestLen ); long collLen = gbstrlen ( m_coll ); // . returns NULL and sets g_errno on error // . calculate total size of the record // . 1 byte for rdbId, 1 byte for flags, // then collection NULL terminated, then list long requestLen = 1 + 1 + collLen + 1 + listSize ; // make the request char *request = (char *) mmalloc ( requestLen ,"Msg1" ); if ( ! request ) return true; char *p = request; // store the rdbId at top of request *p++ = m_rdbId; // then the flags *p = 0; if ( m_injecting ) *p |= 0x80; p++; // then collection name memcpy ( p , m_coll , collLen ); p += collLen; *p++ = '\0'; // sanity check if ( collLen <= 0 ) { log(LOG_LOGIC,"net: No collection specified for list add."); //char *xx = NULL; *xx = 0; g_errno = ENOCOLLREC; return true; } //if ( m_deleteRecs ) request[1] |= 0x80; //if ( m_overwriteRecs ) request[1] |= 0x40; // store the list after coll memcpy ( p , listData , listSize ); QUICKPOLL(m_niceness); // debug msg //if ( ! m_waitForReply ) // (m_rdbId == RDB_SPIDERDB || //m_rdbId == RDB_TFNDB) ) // // if we don't get here we lose it!!!!!!!!!!!!!!!!!!!!! // log("using mcast=%lu rdbId=%li listData=%lu listSize=%lu " // "gid=%lu", // (long)&m_mcast,(long)m_rdbId,(long)listData,(long)listSize, // groupId); // for small packets //long niceness = 2; //if ( requestLen < TMPBUFSIZE - 32 ) niceness = 0; //log("msg1: sending mcast niceness=%li",m_niceness); // . multicast to all hosts in group "groupId" // . multicast::send() returns false and sets g_errno on error // . we return false if we block, true otherwise // . will loop indefinitely if a host in this group is down key_t k; k.setMin(); if ( m_mcast.send ( request , // sets mcast->m_msg to this requestLen , // sets mcast->m_msgLen to this 0x01 , // msgType for add rdb record true , // does multicast own msg? shardNum , // group to send to (groupKey) true , // send to whole group? 0 , // key is useless for us this , // state data NULL , // state data gotReplyWrapper1 , 60 , // timeout in secs m_niceness , // niceness false , // realtime -1 , // first host to try NULL , // replyBuf = NULL , 0 , // replyBufMaxSize = 0 , true , // freeReplyBuf = true , false , // doDiskLoadBalancing = false , -1 , // no max cache age limit //(key_t)0 , // cache key k , // cache key RDB_NONE , // bogus rdbId -1 , // unknown minRecSizes read size sendToSelf )) return false; QUICKPOLL(m_niceness); // g_errno should be set log("net: Had error when sending request to add data to %s in shard " "#%lu: %s.", getDbnameFromId(m_rdbId),shardNum,mstrerror(g_errno)); return true; }