// . returns false if blocks true otherwise // 1. read all termlists for docid range // 2. intersect termlists to get the intersecting docids // 3. increment docid ranges and keep going // 4. when done return the top docids bool Msg39::controlLoop ( ) { loop: // error? if ( g_errno ) { hadError: log(LOG_LOGIC,"query: msg39: controlLoop: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return true; } if ( m_phase == 0 ) { // next phase m_phase++; // the starting docid... int64_t d0 = m_ddd; // int16_tcut int64_t delta = MAX_DOCID / (int64_t)m_r->m_numDocIdSplits; // advance to point to the exclusive endpoint m_ddd += delta; // ensure this is exclusive of ddd since it will be // inclusive in the following iteration. int64_t d1 = m_ddd; // fix rounding errors if ( d1 + 20LL > MAX_DOCID ) { d1 = MAX_DOCID; m_ddd = MAX_DOCID; } // fix it m_r->m_minDocId = d0; m_r->m_maxDocId = d1; // -1; // exclude d1 // allow posdbtable re-initialization each time to set // the msg2 termlist ptrs anew, otherwise we core in // call to PosdbTable::init() below //m_posdbTable.m_initialized = false; // reset ourselves, partially, anyway, not tmpq etc. reset2(); // debug log if ( ! m_r->m_forSectionStats && m_debug ) log("msg39: docid split phase %"INT64"-%"INT64"",d0,d1); // wtf? //if ( d0 >= d1 ) break; // load termlists for these docid ranges using msg2 from posdb if ( ! getLists() ) return false; } if ( m_phase == 1 ) { m_phase++; // intersect the lists we loaded using a thread if ( ! intersectLists() ) return false; // error? if ( g_errno ) goto hadError; } // sum up some stats if ( m_phase == 2 ) { m_phase++; if ( m_posdbTable.m_t1 ) { // . measure time to add the lists in bright green // . use darker green if rat is false (default OR) int32_t color; //char *label; color = 0x0000ff00 ; //label = "termlist_intersect"; g_stats.addStat_r ( 0 , m_posdbTable.m_t1 , m_posdbTable.m_t2 , color ); } // accumulate total hits count over each docid split m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6; // minus the shit we filtered out because of gbminint/gbmaxint/ // gbmin/gbmax/gbsortby/gbrevsortby/gbsortbyint/gbrevsortbyint m_numTotalHits -= m_posdbTable.m_filtered; // error? if ( m_posdbTable.m_errno ) { // we do not need to store the intersection i guess..?? m_posdbTable.freeMem(); g_errno = m_posdbTable.m_errno; log("query: posdbtable had error = %s", mstrerror(g_errno)); sendReply ( m_slot , this , NULL , 0 , 0 ,true); return true; } // if we have more docid ranges remaining do more if ( m_ddd < m_dddEnd ) { m_phase = 0; goto loop; } } // ok, we are done, get cluster recs of the winning docids if ( m_phase == 3 ) { m_phase++; // . this loads them using msg51 from clusterdb // . if m_r->m_doSiteClustering is false it just returns true // . this sets m_gotClusterRecs to true if we get them if ( ! setClusterRecs ( ) ) return false; // error setting clusterrecs? if ( g_errno ) goto hadError; } // process the cluster recs if we got them if ( m_gotClusterRecs && ! gotClusterRecs() ) goto hadError; // . all done! set stats and send back reply // . only sends back the cluster recs if m_gotClusterRecs is true estimateHitsAndSendReply(); return true; }
void Msg39::getDocIds2 ( Msg39Request *req ) { // flag it as in use m_inUse = true; // store it, might be redundant if called from getDocIds() above m_r = req; // a handy thing m_debug = false; if ( m_r->m_debug ) m_debug = true; if ( g_conf.m_logDebugQuery ) m_debug = true; if ( g_conf.m_logTimingQuery ) m_debug = true; // ensure it's size is ok if ( m_r->size_coll <= 0 ) { g_errno = ENOCOLLREC; log(LOG_LOGIC,"query: msg39: getDocIds: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } CollectionRec *cr = g_collectiondb.getRec ( m_r->ptr_coll ); if ( ! cr ) { g_errno = ENOCOLLREC; log(LOG_LOGIC,"query: msg39: getDocIds: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // . set our m_q class // . m_boolFlag is either 1 or 0 in this case, the caller did the // auto-detect (boolFlag of 2) before calling us // . this now calls Query::addCompoundTerms() for us if ( ! m_tmpq.set2 ( m_r->ptr_query , m_r->m_language , m_r->m_queryExpansion , m_r->m_useQueryStopWords ) ) { log(LOG_LOGIC,"query: msg39: setQuery: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } QUICKPOLL ( m_r->m_niceness ); // set m_errno if ( m_tmpq.m_truncated ) m_errno = EQUERYTRUNCATED; // ensure matches with the msg3a sending us this request if ( m_tmpq.getNumTerms() != m_r->m_nqt ) { g_errno = EBADENGINEER; log("query: Query parsing inconsistency for q=%s. " "langid=%li. Check langids and m_queryExpansion parms " "which are the only parms that could be different in " "Query::set2()." ,m_tmpq.m_orig ,(long)m_r->m_language ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // debug if ( m_debug ) logf(LOG_DEBUG,"query: msg39: [%lu] Got request " "for q=%s", (long) this,m_tmpq.m_orig); // reset this m_tt.reset(); QUICKPOLL ( m_r->m_niceness ); // assume not doing special docid splitting m_numDocIdSplits = 1; // . do not do splits if caller is already specifying a docid range // like for gbdocid: queries i guess. // . make sure m_msg2 is non-NULL, because if it is NULL we are // evaluating a query for a single docid for seo tools if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) { long nt = m_tmpq.getNumTerms(); m_numDocIdSplits = nt / 2; if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1; } //if ( ! g_conf.m_doDocIdRangeSplitting ) // m_numDocIdSplits = 1; // limit to 10 if ( m_numDocIdSplits > 10 ) m_numDocIdSplits = 10; // . if caller already specified a docid range, then be loyal to that! // . or if we do not have enough query terms to warrant splitting if ( m_numDocIdSplits == 1 ) { getLists(); return; } // . set up docid range cursor // . do twin splitting // . we do no do it this way any more... we subsplit each split // into two halves...!!! see logic in getLists() below!!! //if ( m_r->m_stripe == 1 ) { // m_ddd = MAX_DOCID / 2LL; // m_dddEnd = MAX_DOCID + 1LL; //} //else if ( m_r->m_stripe == 0 ) { // m_ddd = 0; // m_dddEnd = MAX_DOCID / 2LL; //} // support triplets, etc. later //else { // char *xx=NULL;*xx=0; //} // do not do twin splitting if only one host per group //if ( g_hostdb.getNumStripes() == 1 ) { m_ddd = 0; m_dddEnd = MAX_DOCID; //} // . otherwise, to prevent oom, split up docids into ranges // and get winners of each range. if ( ! doDocIdSplitLoop() ) return; // error? if ( g_errno ) { log(LOG_LOGIC,"query: msg39: doDocIdSplitLoop: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // it might not have blocked! if all lists in tree and used no thread // it will come here after sending the reply and destroying "this" return; }
// . returns false if blocked, true if done // . to avoid running out of memory, generate the search results for // multiple smaller docid-ranges, one range at a time. bool Msg39::doDocIdSplitLoop ( ) { long long delta = MAX_DOCID / (long long)m_numDocIdSplits; for ( ; m_ddd < m_dddEnd ; ) { // the starting docid... long long d0 = m_ddd; // advance to point to the exclusive endpoint m_ddd += delta; // ensure this is exclusive of ddd since it will be // inclusive in the following iteration. long long d1 = m_ddd; // fix rounding errors if ( d1 + 20LL > MAX_DOCID ) { d1 = MAX_DOCID; m_ddd = MAX_DOCID; } // fix it m_r->m_minDocId = d0; m_r->m_maxDocId = d1; // -1; // exclude d1 // allow posdbtable re-initialization each time to set // the msg2 termlist ptrs anew, otherwise we core in // call to PosdbTable::init() below //m_posdbTable.m_initialized = false; // reset ourselves, partially, anyway, not tmpq etc. reset2(); // debug log log("msg39: docid split phase %lli-%lli",d0,d1); // wtf? if ( d0 >= d1 ) break; // use this //m_debug = true; //log("call1"); // . get the lists // . i think this always should block! // . it will also intersect the termlists to get the search // results and accumulate the winners into the "tree" if ( ! getLists() ) return false; //log("call2 g_errno=%li",(long)g_errno); // if there was an error, stop! if ( g_errno ) break; } // return error reply if we had an error if ( g_errno ) { log("msg39: Had error3: %s.", mstrerror(g_errno)); sendReply (m_slot,this,NULL,0,0 , true); return true; } if ( m_debug ) log("msg39: done with all docid range splits"); // all done. this will send reply back //estimateHits(); //addedLists(); // should we put cluster recs in the tree? //m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering ); m_gotClusterRecs = ( m_r->m_doSiteClustering ); // . before we send the top docids back, lookup their site hashes // in clusterdb so we can do filtering at this point. // BUT only do this if we are in a "full split" config, because that // way we can guarantee all clusterdb recs are local (on this host) // and should be in the page cache. the page cache should do ultra // quick lookups and no memcpy()'s for this operation. it should // be <<1ms to lookup thousands of docids. // . when doing innerLoopSiteClustering we always use top tree now // because our number of "top docids" can be somewhat unpredictably // large due to having a ton of results with the same "domain hash" // (see the "vcount" in IndexTable2.cpp) // . do NOT do if we are just "getting weights", phr and aff weights if ( m_gotClusterRecs ) { // . set the clusterdb recs in the top tree // . this calls estimateHits() in its reply wrapper when done return setClusterRecs ( ) ; } // if we did not call setClusterRecs, go on to estimate the hits estimateHits(); // no block, we are done return true; }
void AP_Lists_preview::draw(void) { UT_return_if_fail(m_pFont); GR_Painter painter(m_gc); m_gc->setFont(m_pFont); UT_RGBColor clrGrey = UT_RGBColor(128,128,128); UT_RGBColor clrBlack = UT_RGBColor(0,0,0); UT_sint32 iWidth = m_gc->tlu(getWindowWidth()); UT_sint32 iHeight = m_gc->tlu(getWindowHeight()); UT_UCSChar ucs_label[50]; UT_sint32 iDescent = m_gc->getFontDescent(); UT_sint32 iAscent = m_gc->getFontAscent(); UT_sint32 iFont = iDescent + iAscent; m_iLine_height = iFont; // // clear our screen // if (m_bFirst == true) { painter.clearArea(0, 0, iWidth, iHeight); } m_gc->setColor(clrBlack); UT_sint32 yoff = m_gc->tlu(5) ; UT_sint32 xoff = m_gc->tlu(5) ; UT_sint32 i,ii,yloc,awidth,aheight,maxw; UT_sint32 twidth =0; UT_sint32 j,xy; float z,fwidth; // todo 6.5 should be the page width in inches float pagew = 2.0; aheight = m_gc->tlu(16); fwidth = static_cast<float>(m_gc->tdu(iWidth)); z = (float)((fwidth - 2.0*static_cast<float>(m_gc->tdu(xoff))) /pagew); UT_sint32 indent = m_gc->tlu(static_cast<UT_sint32>( z*(m_fAlign+m_fIndent))); if(indent < 0) indent = 0; maxw = 0; for(i=0; i<4; i++) { UT_UCSChar * lv = getLists()->getListLabel(i); UT_sint32 len =0; if(lv != NULL) { // // This code is here because UT_UCS_copy_char is broken // len = UT_MIN(UT_UCS4_strlen(lv),51); for(j=0; j<=len;j++) { ucs_label[j] = *lv++; } ucs_label[len] = 0; len = UT_UCS4_strlen(ucs_label); yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/4; // painter.drawChars(ucs_label,0,len,xoff+indent,yloc); twidth = m_gc->measureString(ucs_label,0,len,NULL); if(twidth > maxw) maxw = twidth; } } // // Work out where to put grey areas to represent text // UT_sint32 xx,yy; if(maxw > 0) maxw++; // UT_sint32 vspace = (iHeight - 2*yoff -iFont)*i/16; z = (float)((fwidth - 2.0*static_cast<float>(m_gc->tdu(xoff))) /(float)pagew); UT_sint32 ialign = m_gc->tlu(static_cast<UT_sint32>( z*m_fAlign)); xx = xoff + ialign; xy = xoff + ialign; if(xx < (xoff + maxw + indent)) xy = xoff + maxw + indent + m_gc->tlu(1); ii = 0; for(i=0; i<4; i++) { yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/4; for(j=0; j< 2; j++) { yy = yloc + m_gc->tlu(5) + j*m_gc->tlu(21); m_iLine_pos[ii++] = yy; } } // // Now finally draw the preview // UT_BidiCharType iDirection = getLists()->getBlock()->getDominantDirection(); for(i=0; i<8; i++) { // // First clear the line // painter.clearArea(0, m_iLine_pos[i], iWidth, iHeight); if((i & 1) == 0) { // // Draw the text // UT_UCSChar * lv = getLists()->getListLabel(i/2); UT_sint32 len =0; if(lv != NULL) { len = UT_MIN(UT_UCS4_strlen(lv),49); if(len > 1 && XAP_App::getApp()->theOSHasBidiSupport() == XAP_App::BIDI_SUPPORT_GUI) { UT_bidiReorderString(lv, len, iDirection, ucs_label); } else { for(j=0; j<=len;j++) ucs_label[j] = *lv++; } ucs_label[len] = 0; len = UT_UCS4_strlen(ucs_label); yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/8; if(iDirection == UT_BIDI_RTL) painter.drawChars(ucs_label,0,len,iWidth - xoff - indent - maxw,yloc); else painter.drawChars(ucs_label,0,len,xoff+indent,yloc); yy = m_iLine_pos[i]; awidth = iWidth - 2*xoff - xy; if(iDirection == UT_BIDI_RTL) painter.fillRect(clrGrey,xoff,yy,awidth,aheight); else painter.fillRect(clrGrey,xy,yy,awidth,aheight); } else { yy = m_iLine_pos[i]; awidth = iWidth - 2*xoff - xy; if(iDirection == UT_BIDI_RTL) painter.fillRect(clrGrey,xoff,yy,awidth,aheight); else painter.fillRect(clrGrey,xy,yy,awidth,aheight); } } else { yy = m_iLine_pos[i]; awidth = iWidth - 2*xoff - xx; if(iDirection == UT_BIDI_RTL) painter.fillRect(clrGrey,xoff,yy,awidth,aheight); else painter.fillRect(clrGrey,xy,yy,awidth,aheight); } } }