// . this is called when we got a local RdbList // . we need to call it to call the original caller callback void gotListWrapper2 ( void *state , RdbList *list , Msg5 *msg5 ) { logTrace( g_conf.m_logTraceMsg0, "BEGIN" ); Msg0 *THIS = (Msg0 *) state; THIS->reset(); // delete m_msg5 THIS->m_callback ( THIS->m_state );//, THIS->m_list ); logTrace( g_conf.m_logTraceMsg0, "END. rdbId=%d", (int)THIS->m_rdbId ); }
// . return false if you want this slot immediately nuked w/o replying to it void gotSingleReplyWrapper ( void *state , UdpSlot *slot ) { Msg0 *THIS = (Msg0 *)state; if ( ! g_errno ) { int32_t replySize = slot->m_readBufSize; int32_t replyMaxSize = slot->m_readBufMaxSize; char *reply = slot->m_readBuf; THIS->gotReply( reply , replySize , replyMaxSize ); // don't let UdpServer free this since we own it now slot->m_readBuf = NULL; } // never let m_request (sendBuf) be freed slot->m_sendBufAlloc = NULL; // do the callback now THIS->m_callback ( THIS->m_state );// THIS->m_list ); }
// scan all Rdb databases and ensure no recs (it was a clean delete) bool checkRdbLists ( long *rdbId ) { CollectionRec *cr = g_collectiondb.getRec("qatest123"); if ( ! cr ) return true; collnum_t cn = cr->m_collnum; for ( ; *rdbId < RDB_END ; ) { // pre-inc it *rdbId = *rdbId + 1; char minKey[MAX_KEY_BYTES]; char maxKey[MAX_KEY_BYTES]; KEYMIN(minKey,MAX_KEY_BYTES); KEYMAX(maxKey,MAX_KEY_BYTES); if ( ! s_msg0.getList ( 0 , // hostid 0 , // ip 0 , // port 0 , // cacheage false, // addtocache *rdbId , // rdbid cn , // collnum &s_list , minKey , maxKey , 1000 , // minrecsizes rdbId , // state gotList33, 0 // niceness ) ) return false; } return true; }
void gotMulticastReplyWrapper0 ( void *state , void *state2 ) { logTrace( g_conf.m_logTraceMsg0, "BEGIN" ); Msg0 *THIS = (Msg0 *)state; if ( ! g_errno ) { int32_t replySize; int32_t replyMaxSize; bool freeit; char *reply = THIS->m_mcast.getBestReply (&replySize, &replyMaxSize, &freeit); THIS->gotReply( reply , replySize , replyMaxSize ) ; } THIS->m_callback ( THIS->m_state ); logTrace( g_conf.m_logTraceMsg0, "END" ); }
void gotMulticastReplyWrapper0 ( void *state , void *state2 ) { Msg0 *THIS = (Msg0 *)state; //#ifdef SPLIT_INDEXDB //if ( g_hostdb.m_indexSplits > 1 ) { /* if ( THIS->m_numSplit > 1 ) { THIS->m_numReplies++; if ( ! g_errno ) { QUICKPOLL(THIS->m_niceness); // for split, wait for all replies if ( THIS->m_numReplies < THIS->m_numRequests ) return; else { // got it all, call the reply // watch out for someone having an error if ( ! THIS->m_errno ) THIS->gotSplitReply(); else g_errno = THIS->m_errno; } } else { // got an error, set an error state and wait for all // replies THIS->m_errno = g_errno; if ( THIS->m_numReplies < THIS->m_numRequests ) return; } THIS->m_callback ( THIS->m_state );//, THIS->m_list ); } //#else else { */ if ( ! g_errno ) { int32_t replySize; int32_t replyMaxSize; bool freeit; char *reply = THIS->m_mcast.getBestReply (&replySize, &replyMaxSize, &freeit); THIS->gotReply( reply , replySize , replyMaxSize ) ; } THIS->m_callback ( THIS->m_state );//, THIS->m_list ); //} //#endif }
// . this is called when we got a local RdbList // . we need to call it to call the original caller callback void gotListWrapper2 ( void *state , RdbList *list , Msg5 *msg5 ) { Msg0 *THIS = (Msg0 *) state; THIS->reset(); // delete m_msg5 THIS->m_callback ( THIS->m_state );//, THIS->m_list ); }
bool launchRequests ( State10 *st ) { // nothing to do if no query if ( ! st->m_query[0] ) return true; // all done if add request only if ( st->m_add || st->m_del ) return true; loop: long split = st->m_i; // all done? if ( split >= g_hostdb.getNumGroups() ) return true; // get group id //unsigned long gid = g_hostdb.getGroupId ( split ); // get group //Host *hosts = g_hostdb.getGroup ( gid ); // get host from that group, just pick the first one, assume not dead!!! //Host *h = &hosts[0]; //fprintf(stderr,"termId now=%lli\n",st->m_termId); //fprintf(stderr,"should be=%lli\n",(st->m_termId & TERMID_MASK)); // now get the indexList for this termId char startKey[16]; char endKey [16]; key_t s12 = g_indexdb.makeStartKey ( st->m_termId ); key_t e12 = g_indexdb.makeEndKey ( st->m_termId ); key128_t s16 = g_datedb.makeStartKey ( st->m_termId ,0xffffffff); key128_t e16 = g_datedb.makeEndKey ( st->m_termId ,0x0); char rdbId; long ks; if ( st->m_useDatedb ) { memcpy ( startKey , &s16 , 16 ); memcpy ( endKey , &e16 , 16 ); rdbId = RDB_DATEDB; ks = 16; } else { memcpy ( startKey , &s12 , 12 ); memcpy ( endKey , &e12 , 12 ); rdbId = RDB_INDEXDB; ks = 12; } // get the rdb ptr to titledb's rdb //Rdb *rdb = g_indexdb.getRdb(); // -1 means read from all files in Indexdb long numFiles = -1; // make it zero if caller doesn't want to hit the disk if ( ! st->m_useDisk ) numFiles = 0; // inc to next st->m_i++; // get the title rec at or after this docId Msg0 *m = &st->m_msg0; if ( ! m->getList ( -1 , // h->m_hostId , -1 , // ip -1 , // port 0 , // max cache age false , // add to cache? rdbId , // RDB_INDEXDB , // rdbId of 2 = indexdb st->m_coll , &st->m_list , startKey , endKey , st->m_numRecs * ks, // recSizes //st->m_useTree , // include tree? //st->m_useCache , // include cache? //false , // add to cache? //0 , // startFileNum //numFiles , // numFiles st , // state gotIndexListWrapper , 0 , // niceness false , // error correction? true , // include tree? true , // do merge? -1 , // first hostid 0 , // start file num -1 , // numFiles 99999 , // timeout -1 , // sync point -1 , // prefer local reads? NULL , // msg5 NULL , // msg5b false , // is real merge? true , // allow page cache? false , // force local indexdb? true , // do split? split ))// group # to send to return false; // launch more goto loop; // otherwise call gotResults which returns false if blocked, true else // and sets g_errno on error //return gotIndexList ( (void *) st , NULL ); }
// . returns false if blocked, true otherwise // . returns true on error and sets g_errno bool SiteGetter::getSiteList ( ) { top: // . setSite() will return TRUE and set g_errno on error, and returns // false if it blocked adding a tag, which will call callback once // tag is added // . stop at this point if ( m_pathDepth >= 3 ) return setSite(); // or if no more if ( m_pathDepth >= m_maxPathDepth ) return setSite(); // . make the termid // . but here we get are based on "m_pathDepth" which ranges // from 1 to N // . if m_pathDepth==0 use "www.xyz.com" as site // . if m_pathDepth==1 use "www.xyz.com/foo/" as site ... char *pend = getPathEnd ( m_url , m_pathDepth ); // hash up to that //char *host = m_u.getHost(); char *host = getHostFast ( m_url , NULL ); // hash the prefix first to match XmlDoc::hashNoSplit() char *prefix = "siteterm"; // hash that and we will incorporate it to match XmlDoc::hashNoSplit() int64_t ph = hash64 ( prefix , gbstrlen(prefix) ); // . this should match basically what is in XmlDoc.cpp::hash() // . and this now does not include pages that have no outlinks // "underneath" them. int64_t termId = hash64 ( host , pend - host , ph ) & TERMID_MASK; // get all pages that have this as their termid! key144_t start ; key144_t end ; g_posdb.makeStartKey ( &start, termId ); g_posdb.makeEndKey ( &end , termId ); // . now see how many urls art at this path depth from this hostname // . if it is a huge # then we know they are all subsites! // because it is too bushy to be anything else // . i'd say 100 nodes is good enough to qualify as a homestead site int32_t minRecSizes = 5000000; // get the group this list is in //uint32_t gid ; //gid = getGroupId ( RDB_POSDB , (char *)&start , false ); //split? //uint32_t shardNum ; //shardNum = getShardNum( RDB_POSDB , (char *)&start , false ); //split? // i guess this is split by termid and not docid???? int32_t shardNum = g_hostdb.getShardNumByTermId ( &start ); // we need a group #. the column #. //int32_t split = g_hostdb.getGroupNum ( gid ); // int16_tcut Msg0 *m = &m_msg0; // get the list. returns false if blocked. if ( ! m->getList ( -1 , // hostId 0 , // ip 0 , // port 0 , // maxCacheAge false , // addToCache RDB_POSDB , m_collnum , &m_list , (char *)&start , (char *)&end , minRecSizes , this , gotSiteListWrapper , m_niceness , // MAX_NICENESS // default parms follow true , // doErrorCorrection? true , // includeTree? true , // doMerge? -1 , // firstHostId 0 , // startFileNum -1 , // numFiles 999999, // timeout -1 , // syncPoint -1 , // preferLocalReads NULL , // msg5 NULL , // msg5b false , // isrealmerge? true , // allowpagecache? false , // forceLocalIndexdb? false , // doIndexdbSplit? nosplit shardNum ) )//split )) return false; // return false if this blocked if ( ! gotSiteList() ) return false; // error? if ( g_errno ) return true; // or all done if ( m_allDone ) return true; // otherwise, try the next path component! goto top; }