Esempi in C++ (Cpp) per getLists

Esempio n. 1

0

Mostra file

File: Msg39.cpp Progetto: DeadNumbers/open-source-search-engine

// . returns false if blocks true otherwise
// 1. read all termlists for docid range
// 2. intersect termlists to get the intersecting docids
// 3. increment docid ranges and keep going
// 4. when done return the top docids
bool Msg39::controlLoop ( ) {

 loop:
	
	// error?
	if ( g_errno ) {
	hadError:
		log(LOG_LOGIC,"query: msg39: controlLoop: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return true; 
	}

	if ( m_phase == 0 ) {
		// next phase
		m_phase++;
		// the starting docid...
		int64_t d0 = m_ddd;
		// int16_tcut
		int64_t delta = MAX_DOCID / (int64_t)m_r->m_numDocIdSplits;
		// advance to point to the exclusive endpoint
		m_ddd += delta;
		// ensure this is exclusive of ddd since it will be
		// inclusive in the following iteration.
		int64_t d1 = m_ddd;
		// fix rounding errors
		if ( d1 + 20LL > MAX_DOCID ) {
			d1    = MAX_DOCID;
			m_ddd = MAX_DOCID;
		}
		// fix it
		m_r->m_minDocId = d0;
		m_r->m_maxDocId = d1; // -1; // exclude d1
		// allow posdbtable re-initialization each time to set
		// the msg2 termlist ptrs anew, otherwise we core in
		// call to PosdbTable::init() below
		//m_posdbTable.m_initialized = false;
		// reset ourselves, partially, anyway, not tmpq etc.
		reset2();
		// debug log
		if ( ! m_r->m_forSectionStats && m_debug )
			log("msg39: docid split phase %"INT64"-%"INT64"",d0,d1);
		// wtf?
		//if ( d0 >= d1 ) break;
		// load termlists for these docid ranges using msg2 from posdb
		if ( ! getLists() ) return false;
	}

	if ( m_phase == 1 ) {
		m_phase++;
		// intersect the lists we loaded using a thread
		if ( ! intersectLists() ) return false;
		// error?
		if ( g_errno ) goto hadError;
	}

	// sum up some stats
	if ( m_phase == 2 ) {
		m_phase++;
		if ( m_posdbTable.m_t1 ) {
			// . measure time to add the lists in bright green
			// . use darker green if rat is false (default OR)
			int32_t color;
			//char *label;
			color = 0x0000ff00 ;
			//label = "termlist_intersect";
			g_stats.addStat_r ( 0 , 
					    m_posdbTable.m_t1 , 
					    m_posdbTable.m_t2 , color );
		}
		// accumulate total hits count over each docid split
		m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6;
		// minus the shit we filtered out because of gbminint/gbmaxint/
		// gbmin/gbmax/gbsortby/gbrevsortby/gbsortbyint/gbrevsortbyint
		m_numTotalHits -= m_posdbTable.m_filtered;
		// error?
		if ( m_posdbTable.m_errno ) {
			// we do not need to store the intersection i guess..??
			m_posdbTable.freeMem();
			g_errno = m_posdbTable.m_errno;
			log("query: posdbtable had error = %s",
			    mstrerror(g_errno));
			sendReply ( m_slot , this , NULL , 0 , 0 ,true);
			return true;
		}
		// if we have more docid ranges remaining do more
		if ( m_ddd < m_dddEnd ) {
			m_phase = 0;
			goto loop;
		}
	}

	// ok, we are done, get cluster recs of the winning docids
	if ( m_phase == 3 ) {
		m_phase++;
		// . this loads them using msg51 from clusterdb
		// . if m_r->m_doSiteClustering is false it just returns true
		// . this sets m_gotClusterRecs to true if we get them
		if ( ! setClusterRecs ( ) ) return false;
		// error setting clusterrecs?
		if ( g_errno ) goto hadError;
	}

	// process the cluster recs if we got them
	if ( m_gotClusterRecs && ! gotClusterRecs() )
		goto hadError;

	// . all done! set stats and send back reply
	// . only sends back the cluster recs if m_gotClusterRecs is true
	estimateHitsAndSendReply();

	return true;
}

Esempio n. 2

0

Mostra file

File: Msg39.cpp Progetto: abhayprakash/open-source-search-engine

void Msg39::getDocIds2 ( Msg39Request *req ) {

	// flag it as in use
	m_inUse = true;

	// store it, might be redundant if called from getDocIds() above
	m_r = req;

	// a handy thing
	m_debug = false;
	if ( m_r->m_debug          ) m_debug = true;
	if ( g_conf.m_logDebugQuery  ) m_debug = true;
	if ( g_conf.m_logTimingQuery ) m_debug = true;

        // ensure it's size is ok
        if ( m_r->size_coll <= 0 ) {
		g_errno = ENOCOLLREC;
		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

        CollectionRec *cr = g_collectiondb.getRec ( m_r->ptr_coll );
        if ( ! cr ) {
		g_errno = ENOCOLLREC;
		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

	// . set our m_q class
	// . m_boolFlag is either 1 or 0 in this case, the caller did the
	//   auto-detect (boolFlag of 2) before calling us
	// . this now calls Query::addCompoundTerms() for us
	if ( ! m_tmpq.set2 ( m_r->ptr_query  , 
			     m_r->m_language ,
			     m_r->m_queryExpansion ,
			     m_r->m_useQueryStopWords ) ) {
		log(LOG_LOGIC,"query: msg39: setQuery: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

	QUICKPOLL ( m_r->m_niceness );

	// set m_errno
	if ( m_tmpq.m_truncated ) m_errno = EQUERYTRUNCATED;
	// ensure matches with the msg3a sending us this request
	if ( m_tmpq.getNumTerms() != m_r->m_nqt ) {
		g_errno = EBADENGINEER;
		log("query: Query parsing inconsistency for q=%s. "
		    "langid=%li. Check langids and m_queryExpansion parms "
		    "which are the only parms that could be different in "
		    "Query::set2()."
		    ,m_tmpq.m_orig
		    ,(long)m_r->m_language
		    );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}
	// debug
	if ( m_debug )
		logf(LOG_DEBUG,"query: msg39: [%lu] Got request "
		     "for q=%s", (long) this,m_tmpq.m_orig);

	// reset this
	m_tt.reset();

	QUICKPOLL ( m_r->m_niceness );

	// assume not doing special docid splitting
	m_numDocIdSplits = 1;

	// . do not do splits if caller is already specifying a docid range
	//   like for gbdocid: queries i guess.
	// . make sure m_msg2 is non-NULL, because if it is NULL we are
	//   evaluating a query for a single docid for seo tools
	if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
		long nt = m_tmpq.getNumTerms();
		m_numDocIdSplits = nt / 2;
		if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1;
	}

	//if ( ! g_conf.m_doDocIdRangeSplitting )
	//	m_numDocIdSplits = 1;

	// limit to 10
	if ( m_numDocIdSplits > 10 ) 
		m_numDocIdSplits = 10;

	// . if caller already specified a docid range, then be loyal to that!
	// . or if we do not have enough query terms to warrant splitting
	if ( m_numDocIdSplits == 1 ) {
		getLists();
		return;
	}

	// . set up docid range cursor
	// . do twin splitting
	// . we do no do it this way any more... we subsplit each split
	//   into two halves...!!! see logic in getLists() below!!!
	//if ( m_r->m_stripe == 1 ) {
	//	m_ddd = MAX_DOCID / 2LL;
	//	m_dddEnd = MAX_DOCID + 1LL;
	//}
	//else if ( m_r->m_stripe == 0 ) {
	//	m_ddd = 0;
	//	m_dddEnd = MAX_DOCID / 2LL;
	//}
	// support triplets, etc. later
	//else {
	//	char *xx=NULL;*xx=0; 
	//}

	// do not do twin splitting if only one host per group
	//if ( g_hostdb.getNumStripes() == 1 ) {
	m_ddd    = 0;
	m_dddEnd = MAX_DOCID;
	//}


	// . otherwise, to prevent oom, split up docids into ranges
	//   and get winners of each range.
	if ( ! doDocIdSplitLoop() ) return;

	// error?
	if ( g_errno ) {
		log(LOG_LOGIC,"query: msg39: doDocIdSplitLoop: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}
	// it might not have blocked! if all lists in tree and used no thread
	// it will come here after sending the reply and destroying "this"
	return;
}

Esempio n. 3

0

Mostra file

File: Msg39.cpp Progetto: abhayprakash/open-source-search-engine

// . returns false if blocked, true if done
// . to avoid running out of memory, generate the search results for
//   multiple smaller docid-ranges, one range at a time.
bool Msg39::doDocIdSplitLoop ( ) {
	long long delta = MAX_DOCID / (long long)m_numDocIdSplits;
	for ( ; m_ddd < m_dddEnd ; ) {
		// the starting docid...
		long long d0 = m_ddd;
		// advance to point to the exclusive endpoint
		m_ddd += delta;
		// ensure this is exclusive of ddd since it will be
		// inclusive in the following iteration.
		long long d1 = m_ddd;
		// fix rounding errors
		if ( d1 + 20LL > MAX_DOCID ) {
			d1    = MAX_DOCID;
			m_ddd = MAX_DOCID;
		}
		// fix it
		m_r->m_minDocId = d0;
		m_r->m_maxDocId = d1; // -1; // exclude d1
		// allow posdbtable re-initialization each time to set
		// the msg2 termlist ptrs anew, otherwise we core in
		// call to PosdbTable::init() below
		//m_posdbTable.m_initialized = false;
		// reset ourselves, partially, anyway, not tmpq etc.
		reset2();
		// debug log
		log("msg39: docid split phase %lli-%lli",d0,d1);
		// wtf?
		if ( d0 >= d1 ) break;
		// use this
		//m_debug = true;
		//log("call1");
		// . get the lists
		// . i think this always should block!
		// . it will also intersect the termlists to get the search
		//   results and accumulate the winners into the "tree"
		if ( ! getLists() ) return false;
		//log("call2 g_errno=%li",(long)g_errno);
		// if there was an error, stop!
		if ( g_errno ) break;
	}

	// return error reply if we had an error
	if ( g_errno ) {
		log("msg39: Had error3: %s.", mstrerror(g_errno));
		sendReply (m_slot,this,NULL,0,0 , true);
		return true; 
	}

	if ( m_debug ) 
		log("msg39: done with all docid range splits");

	// all done. this will send reply back
	//estimateHits();
	//addedLists();

	// should we put cluster recs in the tree?
	//m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering );
	m_gotClusterRecs = ( m_r->m_doSiteClustering );
	
	// . before we send the top docids back, lookup their site hashes
	//   in clusterdb so we can do filtering at this point.
	//   BUT only do this if we are in a "full split" config, because that
	//   way we can guarantee all clusterdb recs are local (on this host)
	//   and should be in the page cache. the page cache should do ultra
	//   quick lookups and no memcpy()'s for this operation. it should
	//   be <<1ms to lookup thousands of docids.
	// . when doing innerLoopSiteClustering we always use top tree now
	//   because our number of "top docids" can be somewhat unpredictably 
	//   large due to having a ton of results with the same "domain hash" 
	//   (see the "vcount" in IndexTable2.cpp)
	// . do NOT do if we are just "getting weights", phr and aff weights
	if ( m_gotClusterRecs ) {
		// . set the clusterdb recs in the top tree
		// . this calls estimateHits() in its reply wrapper when done
		return setClusterRecs ( ) ;
	}

	// if we did not call setClusterRecs, go on to estimate the hits
	estimateHits();

	// no block, we are done
	return true;
}

Esempio n. 4

0

Mostra file

void AP_Lists_preview::draw(void)
{
	UT_return_if_fail(m_pFont);

	GR_Painter painter(m_gc);

	m_gc->setFont(m_pFont);
	
	UT_RGBColor clrGrey = UT_RGBColor(128,128,128);
	UT_RGBColor clrBlack = UT_RGBColor(0,0,0);
	UT_sint32 iWidth = m_gc->tlu(getWindowWidth());
	UT_sint32 iHeight = m_gc->tlu(getWindowHeight());
	UT_UCSChar ucs_label[50];

	UT_sint32 iDescent = m_gc->getFontDescent();
	UT_sint32 iAscent = m_gc->getFontAscent();
	UT_sint32 iFont = iDescent + iAscent;
	m_iLine_height = iFont;
	//
	// clear our screen
	//
	if (m_bFirst == true)
	{
		painter.clearArea(0, 0, iWidth, iHeight);
	}
	m_gc->setColor(clrBlack);
	UT_sint32 yoff = m_gc->tlu(5) ;
	UT_sint32 xoff = m_gc->tlu(5) ;
	UT_sint32 i,ii,yloc,awidth,aheight,maxw;
	UT_sint32 twidth =0;
	UT_sint32 j,xy;
	float z,fwidth;
	// todo 6.5 should be the page width in inches
	float pagew = 2.0;
	aheight = m_gc->tlu(16);
	fwidth = static_cast<float>(m_gc->tdu(iWidth));

	z = (float)((fwidth - 2.0*static_cast<float>(m_gc->tdu(xoff))) /pagew);
  UT_sint32 indent = m_gc->tlu(static_cast<UT_sint32>( z*(m_fAlign+m_fIndent)));

	if(indent < 0)
		indent = 0;
	maxw = 0;
	for(i=0; i<4; i++)
	{
		UT_UCSChar * lv = getLists()->getListLabel(i);
		UT_sint32 len =0;

		if(lv != NULL)
		{
			//
			// This code is here because UT_UCS_copy_char is broken
			//
			len = UT_MIN(UT_UCS4_strlen(lv),51);
			for(j=0; j<=len;j++)
			{
				ucs_label[j] = *lv++;
			}

			ucs_label[len] = 0;

			len = UT_UCS4_strlen(ucs_label);
			yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/4;
			//    painter.drawChars(ucs_label,0,len,xoff+indent,yloc);
			twidth = m_gc->measureString(ucs_label,0,len,NULL);
			if(twidth > maxw)
				maxw = twidth;
		}
	}
	//
	// Work out where to put grey areas to represent text
	//
	UT_sint32 xx,yy;
	if(maxw > 0)
		maxw++;

        // UT_sint32 vspace = (iHeight - 2*yoff -iFont)*i/16;
	z = (float)((fwidth - 2.0*static_cast<float>(m_gc->tdu(xoff))) /(float)pagew);
	UT_sint32 ialign = m_gc->tlu(static_cast<UT_sint32>( z*m_fAlign));

	xx = xoff + ialign;
	xy = xoff + ialign;

	if(xx < (xoff + maxw + indent))
		xy = xoff + maxw + indent + m_gc->tlu(1);
	ii = 0;

	for(i=0; i<4; i++)
	{
		yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/4;
		for(j=0; j< 2; j++)
		{
			yy = yloc + m_gc->tlu(5) + j*m_gc->tlu(21);
			m_iLine_pos[ii++] = yy;
		}
	}
	//
	// Now finally draw the preview
	//

	UT_BidiCharType iDirection = getLists()->getBlock()->getDominantDirection();

	for(i=0; i<8; i++)
	{
		//
		// First clear the line
		//
		painter.clearArea(0, m_iLine_pos[i], iWidth, iHeight);
		if((i & 1) == 0)
		{
			//
			// Draw the text
			//
			UT_UCSChar * lv = getLists()->getListLabel(i/2);
			UT_sint32 len =0;

			if(lv != NULL)
			{
				len = UT_MIN(UT_UCS4_strlen(lv),49);

				if(len > 1 && XAP_App::getApp()->theOSHasBidiSupport() == XAP_App::BIDI_SUPPORT_GUI)
				{
					UT_bidiReorderString(lv, len, iDirection, ucs_label);
				}
				else
				{
					for(j=0; j<=len;j++)
						ucs_label[j] = *lv++;
				}

				ucs_label[len] = 0;
				len = UT_UCS4_strlen(ucs_label);
				yloc = yoff + iAscent + (iHeight - 2*yoff -iFont)*i/8;

				if(iDirection == UT_BIDI_RTL)
					painter.drawChars(ucs_label,0,len,iWidth - xoff - indent - maxw,yloc);
				else
					painter.drawChars(ucs_label,0,len,xoff+indent,yloc);

				yy = m_iLine_pos[i];
				awidth = iWidth - 2*xoff - xy;

				if(iDirection == UT_BIDI_RTL)
					painter.fillRect(clrGrey,xoff,yy,awidth,aheight);
				else
					painter.fillRect(clrGrey,xy,yy,awidth,aheight);
			}
			else
			{
				yy = m_iLine_pos[i];
				awidth = iWidth - 2*xoff - xy;

				if(iDirection == UT_BIDI_RTL)
					painter.fillRect(clrGrey,xoff,yy,awidth,aheight);
				else
					painter.fillRect(clrGrey,xy,yy,awidth,aheight);
			}
		}
		else
		{
			yy = m_iLine_pos[i];
			awidth = iWidth - 2*xoff - xx;

			if(iDirection == UT_BIDI_RTL)
				painter.fillRect(clrGrey,xoff,yy,awidth,aheight);
			else
				painter.fillRect(clrGrey,xy,yy,awidth,aheight);
		}
	}
}