// return false if blocked, true otherwise bool Msg39::addedLists ( ) { if ( m_posdbTable.m_t1 ) { // . measure time to add the lists in bright green // . use darker green if rat is false (default OR) long color; //char *label; color = 0x0000ff00 ; //label = "termlist_intersect"; g_stats.addStat_r ( 0 , m_posdbTable.m_t1 , m_posdbTable.m_t2 , color ); } // accumulate total hits count over each docid split m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6; // before wrapping up, complete our docid split loops! // so do not send the reply back yet... send reply back from // the docid loop function... doDocIdSplitLoop() if ( m_numDocIdSplits >= 2 ) return true; // . save some memory,free m_topDocIdPtrs2,m_topScores2,m_topExplicits2 // . the m_topTree should have been filled from the call to // IndexTable2::fillTopDocIds() and it no longer has ptrs to the // docIds, but has the docIds themselves //m_posdbTable.freeMem(); // error? if ( m_posdbTable.m_errno ) { // we do not need to store the intersection i guess...?? m_posdbTable.freeMem(); g_errno = m_posdbTable.m_errno; log("query: posdbtable had error = %s",mstrerror(g_errno)); sendReply ( m_slot , this , NULL , 0 , 0 ,true); return true; } // should we put cluster recs in the tree? //m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering ); m_gotClusterRecs = ( m_r->m_doSiteClustering ); // . before we send the top docids back, lookup their site hashes // in clusterdb so we can do filtering at this point. // BUT only do this if we are in a "full split" config, because that // way we can guarantee all clusterdb recs are local (on this host) // and should be in the page cache. the page cache should do ultra // quick lookups and no memcpy()'s for this operation. it should // be <<1ms to lookup thousands of docids. // . when doing innerLoopSiteClustering we always use top tree now // because our number of "top docids" can be somewhat unpredictably // large due to having a ton of results with the same "domain hash" // (see the "vcount" in IndexTable2.cpp) // . do NOT do if we are just "getting weights", phr and aff weights if ( m_gotClusterRecs ) { // . set the clusterdb recs in the top tree return setClusterRecs ( ) ; } // if we did not call setClusterRecs, go on to estimate the hits estimateHits(); return true; }
// . returns false if blocks true otherwise // 1. read all termlists for docid range // 2. intersect termlists to get the intersecting docids // 3. increment docid ranges and keep going // 4. when done return the top docids bool Msg39::controlLoop ( ) { loop: // error? if ( g_errno ) { hadError: log(LOG_LOGIC,"query: msg39: controlLoop: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return true; } if ( m_phase == 0 ) { // next phase m_phase++; // the starting docid... int64_t d0 = m_ddd; // int16_tcut int64_t delta = MAX_DOCID / (int64_t)m_r->m_numDocIdSplits; // advance to point to the exclusive endpoint m_ddd += delta; // ensure this is exclusive of ddd since it will be // inclusive in the following iteration. int64_t d1 = m_ddd; // fix rounding errors if ( d1 + 20LL > MAX_DOCID ) { d1 = MAX_DOCID; m_ddd = MAX_DOCID; } // fix it m_r->m_minDocId = d0; m_r->m_maxDocId = d1; // -1; // exclude d1 // allow posdbtable re-initialization each time to set // the msg2 termlist ptrs anew, otherwise we core in // call to PosdbTable::init() below //m_posdbTable.m_initialized = false; // reset ourselves, partially, anyway, not tmpq etc. reset2(); // debug log if ( ! m_r->m_forSectionStats && m_debug ) log("msg39: docid split phase %"INT64"-%"INT64"",d0,d1); // wtf? //if ( d0 >= d1 ) break; // load termlists for these docid ranges using msg2 from posdb if ( ! getLists() ) return false; } if ( m_phase == 1 ) { m_phase++; // intersect the lists we loaded using a thread if ( ! intersectLists() ) return false; // error? if ( g_errno ) goto hadError; } // sum up some stats if ( m_phase == 2 ) { m_phase++; if ( m_posdbTable.m_t1 ) { // . measure time to add the lists in bright green // . use darker green if rat is false (default OR) int32_t color; //char *label; color = 0x0000ff00 ; //label = "termlist_intersect"; g_stats.addStat_r ( 0 , m_posdbTable.m_t1 , m_posdbTable.m_t2 , color ); } // accumulate total hits count over each docid split m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6; // minus the shit we filtered out because of gbminint/gbmaxint/ // gbmin/gbmax/gbsortby/gbrevsortby/gbsortbyint/gbrevsortbyint m_numTotalHits -= m_posdbTable.m_filtered; // error? if ( m_posdbTable.m_errno ) { // we do not need to store the intersection i guess..?? m_posdbTable.freeMem(); g_errno = m_posdbTable.m_errno; log("query: posdbtable had error = %s", mstrerror(g_errno)); sendReply ( m_slot , this , NULL , 0 , 0 ,true); return true; } // if we have more docid ranges remaining do more if ( m_ddd < m_dddEnd ) { m_phase = 0; goto loop; } } // ok, we are done, get cluster recs of the winning docids if ( m_phase == 3 ) { m_phase++; // . this loads them using msg51 from clusterdb // . if m_r->m_doSiteClustering is false it just returns true // . this sets m_gotClusterRecs to true if we get them if ( ! setClusterRecs ( ) ) return false; // error setting clusterrecs? if ( g_errno ) goto hadError; } // process the cluster recs if we got them if ( m_gotClusterRecs && ! gotClusterRecs() ) goto hadError; // . all done! set stats and send back reply // . only sends back the cluster recs if m_gotClusterRecs is true estimateHitsAndSendReply(); return true; }
// . returns false if blocked, true if done // . to avoid running out of memory, generate the search results for // multiple smaller docid-ranges, one range at a time. bool Msg39::doDocIdSplitLoop ( ) { long long delta = MAX_DOCID / (long long)m_numDocIdSplits; for ( ; m_ddd < m_dddEnd ; ) { // the starting docid... long long d0 = m_ddd; // advance to point to the exclusive endpoint m_ddd += delta; // ensure this is exclusive of ddd since it will be // inclusive in the following iteration. long long d1 = m_ddd; // fix rounding errors if ( d1 + 20LL > MAX_DOCID ) { d1 = MAX_DOCID; m_ddd = MAX_DOCID; } // fix it m_r->m_minDocId = d0; m_r->m_maxDocId = d1; // -1; // exclude d1 // allow posdbtable re-initialization each time to set // the msg2 termlist ptrs anew, otherwise we core in // call to PosdbTable::init() below //m_posdbTable.m_initialized = false; // reset ourselves, partially, anyway, not tmpq etc. reset2(); // debug log log("msg39: docid split phase %lli-%lli",d0,d1); // wtf? if ( d0 >= d1 ) break; // use this //m_debug = true; //log("call1"); // . get the lists // . i think this always should block! // . it will also intersect the termlists to get the search // results and accumulate the winners into the "tree" if ( ! getLists() ) return false; //log("call2 g_errno=%li",(long)g_errno); // if there was an error, stop! if ( g_errno ) break; } // return error reply if we had an error if ( g_errno ) { log("msg39: Had error3: %s.", mstrerror(g_errno)); sendReply (m_slot,this,NULL,0,0 , true); return true; } if ( m_debug ) log("msg39: done with all docid range splits"); // all done. this will send reply back //estimateHits(); //addedLists(); // should we put cluster recs in the tree? //m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering ); m_gotClusterRecs = ( m_r->m_doSiteClustering ); // . before we send the top docids back, lookup their site hashes // in clusterdb so we can do filtering at this point. // BUT only do this if we are in a "full split" config, because that // way we can guarantee all clusterdb recs are local (on this host) // and should be in the page cache. the page cache should do ultra // quick lookups and no memcpy()'s for this operation. it should // be <<1ms to lookup thousands of docids. // . when doing innerLoopSiteClustering we always use top tree now // because our number of "top docids" can be somewhat unpredictably // large due to having a ton of results with the same "domain hash" // (see the "vcount" in IndexTable2.cpp) // . do NOT do if we are just "getting weights", phr and aff weights if ( m_gotClusterRecs ) { // . set the clusterdb recs in the top tree // . this calls estimateHits() in its reply wrapper when done return setClusterRecs ( ) ; } // if we did not call setClusterRecs, go on to estimate the hits estimateHits(); // no block, we are done return true; }