C++ (Cpp) HashTableX::getNumSlotsUsed Exemples

Langage de programmation: C++ (Cpp)

Class/Type: HashTableX

Méthode/Fonction: getNumSlotsUsed

Exemples au hotexamples.com: 2

C++ (Cpp) HashTableX::getNumSlotsUsed - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de HashTableX::getNumSlotsUsed extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

set(26)

addKey(17)

isInTable(10)

getValueFromSlot(10)

addTerm(8)

getScore(8)

getValue(6)

getNumSlots(6)

reset(5)

getSlot(5)

save(3)

removeSlot(3)

load(2)

getKeyFromSlot(2)

getNextSlot(2)

getNumSlotsUsed(2)

removeKey(1)

addTerm32(1)

m_callback(1)

isEmpty(1)

clear(1)

getScore32(1)

getKey32FromSlot(1)

getValFromSlot(1)

Méthodes fréquemment utilisées

set (26)

addKey (17)

isInTable (10)

getValueFromSlot (10)

addTerm (8)

getScore (8)

getValue (6)

getNumSlots (6)

reset (5)

getSlot (5)

Méthodes fréquemment utilisées

save (3)

removeSlot (3)

load (2)

getKeyFromSlot (2)

getNextSlot (2)

getNumSlotsUsed (2)

removeKey (1)

addTerm32 (1)

m_callback (1)

isEmpty (1)

clear (1)

getScore32 (1)

getKey32FromSlot (1)

getValFromSlot (1)

Méthodes fréquemment utilisées

clear (1)

getScore32 (1)

getKey32FromSlot (1)

getValFromSlot (1)

Exemple #1

0

Afficher le fichier

Fichier : Msg39.cpp Projet : DeadNumbers/open-source-search-engine

void Msg39::estimateHitsAndSendReply ( ) { // no longer in use m_inUse = false; // now this for the query loop on the QueryLogEntries. m_topDocId50 = 0LL; m_topScore50 = 0.0; // a little hack for the seo pipeline in xmldoc.cpp m_topDocId = 0LL; m_topScore = 0.0; m_topDocId2 = 0LL; m_topScore2 = 0.0; int32_t ti = m_tt.getHighNode(); if ( ti >= 0 ) { TopNode *t = &m_tt.m_nodes[ti]; m_topDocId = t->m_docId; m_topScore = t->m_score; } // try the 2nd one too int32_t ti2 = -1; if ( ti >= 0 ) ti2 = m_tt.getNext ( ti ); if ( ti2 >= 0 ) { TopNode *t2 = &m_tt.m_nodes[ti2]; m_topDocId2 = t2->m_docId; m_topScore2 = t2->m_score; } // convenience ptrs. we will store the docids/scores into these arrays int64_t *topDocIds; double *topScores; key_t *topRecs; // numDocIds counts docs in all tiers when using toptree. int32_t numDocIds = m_tt.m_numUsedNodes; // the msg39 reply we send back int32_t replySize; char *reply; //m_numTotalHits = m_posdbTable.m_docIdVoteBuf.length() / 6; // make the reply? Msg39Reply mr; // this is what you want to look at if there is no seo.cpp module... if ( ! m_callback ) { // if we got clusterdb recs in here, use 'em if ( m_gotClusterRecs ) numDocIds = m_numVisible; // don't send more than the docs that are asked for if ( numDocIds > m_r->m_docsToGet) numDocIds =m_r->m_docsToGet; // # of QueryTerms in query int32_t nqt = m_tmpq.m_numTerms; // start setting the stuff mr.m_numDocIds = numDocIds; // copy # estiamted hits into 8 bytes of reply //int64_t est = m_posdbTable.m_estimatedTotalHits; // ensure it has at least as many results as we got //if ( est < numDocIds ) est = numDocIds; // or if too big... //if ( numDocIds < m_r->m_docsToGet ) est = numDocIds; // . total estimated hits // . this is now an EXACT count! mr.m_estimatedHits = m_numTotalHits; // sanity check mr.m_nqt = nqt; // the m_errno if any mr.m_errno = m_errno; // int16_tcut PosdbTable *pt = &m_posdbTable; // the score info, in no particular order right now mr.ptr_scoreInfo = pt->m_scoreInfoBuf.getBufStart(); mr.size_scoreInfo = pt->m_scoreInfoBuf.length(); // that has offset references into posdbtable::m_pairScoreBuf // and m_singleScoreBuf, so we need those too now mr.ptr_pairScoreBuf = pt->m_pairScoreBuf.getBufStart(); mr.size_pairScoreBuf = pt->m_pairScoreBuf.length(); mr.ptr_singleScoreBuf = pt->m_singleScoreBuf.getBufStart(); mr.size_singleScoreBuf = pt->m_singleScoreBuf.length(); // save some time since seo.cpp gets from posdbtable directly, // so we can avoid serializing/copying this stuff at least if ( ! m_r->m_makeReply ) { mr.size_scoreInfo = 0; mr.size_pairScoreBuf = 0; mr.size_singleScoreBuf = 0; } //mr.m_sectionStats = pt->m_sectionStats; // reserve space for these guys, we fill them in below mr.ptr_docIds = NULL; mr.ptr_scores = NULL; mr.ptr_clusterRecs = NULL; // this is how much space to reserve mr.size_docIds = 8 * numDocIds; // int64_t mr.size_scores = sizeof(double) * numDocIds; // float // if not doing site clustering, we won't have these perhaps... if ( m_gotClusterRecs ) mr.size_clusterRecs = sizeof(key_t) *numDocIds; else mr.size_clusterRecs = 0; #define MAX_FACETS 20000 ///////////////// // // FACETS // ///////////////// // We can have multiple gbfacet: terms in a query so // serialize all the QueryTerm::m_facetHashTables into // Msg39Reply::ptr_facetHashList. // // combine the facet hash lists of each query term into // a list of lists. each lsit is preceeded by the query term // id of the query term (like gbfacet:xpathsitehash12345) // followed by a 4 byte length of the following 32-bit // facet values int32_t need = 0; for ( int32_t i = 0 ; i < m_tmpq.m_numTerms; i++ ) { QueryTerm *qt = &m_tmpq.m_qterms[i]; // skip if not facet if ( qt->m_fieldCode != FIELD_GBFACETSTR && qt->m_fieldCode != FIELD_GBFACETINT && qt->m_fieldCode != FIELD_GBFACETFLOAT ) continue; HashTableX *ft = &qt->m_facetHashTable; if ( ft->m_numSlotsUsed == 0 ) continue; int32_t used = ft->m_numSlotsUsed; // limit for memory if ( used > (int32_t)MAX_FACETS ) { log("msg39: truncating facet list to 20000 " "from %"INT32" for %s",used,qt->m_term); used = (int32_t)MAX_FACETS; } // store query term id 64 bit need += 8; // then size need += 4; // then buckets. keys and counts need += (4+sizeof(FacetEntry)) * used; } // allocate SafeBuf tmp; if ( ! tmp.reserve ( need ) ) { log("query: Could not allocate memory " "to hold reply facets"); sendReply(m_slot,this,NULL,0,0,true); return; } // point to there char *p = tmp.getBufStart(); for ( int32_t i = 0 ; i < m_tmpq.m_numTerms ; i++ ) { QueryTerm *qt = &m_tmpq.m_qterms[i]; // skip if not facet if ( qt->m_fieldCode != FIELD_GBFACETSTR && qt->m_fieldCode != FIELD_GBFACETINT && qt->m_fieldCode != FIELD_GBFACETFLOAT ) continue; // get all the facet hashes and their counts HashTableX *ft = &qt->m_facetHashTable; // skip if none if ( ft->m_numSlotsUsed == 0 ) continue; // store query term id 64 bit *(int64_t *)p = qt->m_termId; p += 8; int32_t used = ft->getNumSlotsUsed(); if ( used > (int32_t)MAX_FACETS ) used = (int32_t)MAX_FACETS; // store count *(int32_t *)p = used; p += 4; int32_t count = 0; // for sanity check char *pend = p + (used * (4+sizeof(FacetEntry))); // serialize the key/val pairs for ( int32_t k = 0 ; k < ft->m_numSlots ; k++ ) { // skip empty buckets if ( ! ft->m_flags[k] ) continue; // store key. the hash of the facet value. *(int32_t *)p = ft->getKey32FromSlot(k); p += 4; // then store count //*(int32_t *)p = ft->getVal32FromSlot(k); p += 4; // now this has a docid on it so we can // lookup the text of the facet in Msg40.cpp FacetEntry *fe; fe = (FacetEntry *)ft->getValFromSlot(k); // sanity // no, count can be zero if its a range facet // that was never added to. we add those // empty FaceEntries only for range facets // in Posdb.cpp //if(fe->m_count == 0 ) { char *xx=NULL;*xx=0;} gbmemcpy ( p , fe , sizeof(FacetEntry) ); p += sizeof(FacetEntry); // do not breach if ( ++count >= (int32_t)MAX_FACETS ) break; } // sanity check if ( p != pend ) { char *xx=NULL;*xx=0; } // do the next query term } // now point to that so it can be serialized below mr.ptr_facetHashList = tmp.getBufStart(); mr.size_facetHashList = p - tmp.getBufStart();//tmp.length(); ///////////// // // END FACETS // ///////////// // . that is pretty much it,so serialize it into buffer,"reply" // . mr.ptr_docIds, etc., will point into the buffer so we can // re-serialize into it below from the tree // . returns NULL and sets g_errno on error // . "true" means we should make mr.ptr_* reference into the // newly serialized buffer. reply = serializeMsg ( sizeof(Msg39Reply), // baseSize &mr.size_docIds, // firstSizeParm &mr.size_clusterRecs,//lastSizePrm &mr.ptr_docIds , // firstStrPtr &mr , // thisPtr &replySize , NULL , 0 , true ) ; if ( ! reply ) { log("query: Could not allocated memory " "to hold reply of docids to send back."); sendReply(m_slot,this,NULL,0,0,true); return; } topDocIds = (int64_t *) mr.ptr_docIds; topScores = (double *) mr.ptr_scores; topRecs = (key_t *) mr.ptr_clusterRecs; } int32_t docCount = 0; // loop over all results in the TopTree for ( int32_t ti = m_tt.getHighNode() ; ti >= 0 ; ti = m_tt.getPrev(ti) ) { // get the guy TopNode *t = &m_tt.m_nodes[ti]; // skip if clusterLevel is bad! if ( m_gotClusterRecs && t->m_clusterLevel != CR_OK ) continue; // if not sending back a reply... we were called from seo.cpp // State3f logic to evaluate a QueryLogEntry, etc. if ( m_callback ) { // skip results past #50 if ( docCount > 50 ) continue; // set this m_topScore50 = t->m_score; m_topDocId50 = t->m_docId; // that's it continue; } // get the docid ptr //char *diptr = t->m_docIdPtr; //int64_t docId = getDocIdFromPtr(diptr); // sanity check if ( t->m_docId < 0 ) { char *xx=NULL; *xx=0; } //add it to the reply topDocIds [docCount] = t->m_docId; topScores [docCount] = t->m_score; if ( m_tt.m_useIntScores ) topScores[docCount] = (double)t->m_intScore; // supply clusterdb rec? only for full splits if ( m_gotClusterRecs ) topRecs [docCount] = t->m_clusterRec; //topExplicits [docCount] = // getNumBitsOn(t->m_explicits) docCount++; // 50th score? set this for seo.cpp. if less than 50 results // we want the score of the last doc then. if ( docCount <= 50 ) m_topScore50 = t->m_score; if ( m_debug ) { logf(LOG_DEBUG,"query: msg39: [%"PTRFMT"] " "%03"INT32") docId=%012"UINT64" sum=%.02f", (PTRTYPE)this, docCount, t->m_docId,t->m_score); } //don't send more than the docs that are wanted if ( docCount >= numDocIds ) break; } if ( docCount > 300 && m_debug ) log("query: Had %"INT32" nodes in top tree",docCount); // this is sensitive info if ( m_debug ) { log(LOG_DEBUG, "query: msg39: [%"PTRFMT"] " "Intersected lists took %"INT64" (%"INT64") " "ms " "docIdsToGet=%"INT32" docIdsGot=%"INT32" " "q=%s", (PTRTYPE)this , m_posdbTable.m_addListsTime , gettimeofdayInMilliseconds() - m_startTime , m_r->m_docsToGet , numDocIds , m_tmpq.getQuery() ); } // if we blocked because we used a thread then call callback if // summoned from a msg3f handler and not a msg39 handler if ( m_callback ) { // if we blocked call user callback if ( m_blocked ) m_callback ( m_state ); // if not sending back a udp reply, return now return; } // now send back the reply sendReply(m_slot,this,reply,replySize,replySize,false); return; }

Exemple #2

0

Afficher le fichier

Fichier : Msg3a.cpp Projet : rdhananjaya/open-source-search-engine

// . merge all the replies together // . put final merged docids into m_docIds[],m_bitScores[],m_scores[],... // . this calls Msg51 to get cluster levels when done merging // . Msg51 remembers clusterRecs from previous call to avoid repeating lookups // . returns false if blocked, true otherwise // . sets g_errno and returns true on error bool Msg3a::mergeLists ( ) { // time how long the merge takes if ( m_debug ) { logf( LOG_DEBUG, "query: msg3a: --- Final DocIds --- " ); m_startTime = gettimeofdayInMilliseconds(); } // reset our final docids count here in case we are a re-call m_numDocIds = 0; // a secondary count, how many unique docids we scanned, and not // necessarily added to the m_docIds[] array //m_totalDocCount = 0; // long docCount = 0; m_moreDocIdsAvail = true; // shortcut //long numSplits = m_numHosts;//indexdbSplit; // . point to the various docids, etc. in each split reply // . tcPtr = term count. how many required query terms does the doc // have? formerly called topExplicits in IndexTable2.cpp long long *diPtr [MAX_INDEXDB_SPLIT]; float *rsPtr [MAX_INDEXDB_SPLIT]; key_t *ksPtr [MAX_INDEXDB_SPLIT]; long long *diEnd [MAX_INDEXDB_SPLIT]; for ( long j = 0; j < m_numHosts ; j++ ) { Msg39Reply *mr =m_reply[j]; // if we have gbdocid:| in query this could be NULL if ( ! mr ) { diPtr[j] = NULL; diEnd[j] = NULL; rsPtr[j] = NULL; ksPtr[j] = NULL; continue; } diPtr [j] = (long long *)mr->ptr_docIds; rsPtr [j] = (float *)mr->ptr_scores; ksPtr [j] = (key_t *)mr->ptr_clusterRecs; diEnd [j] = (long long *)(mr->ptr_docIds + mr->m_numDocIds * 8); } // clear if we had it if ( m_finalBuf ) { mfree ( m_finalBuf, m_finalBufSize, "Msg3aF" ); m_finalBuf = NULL; m_finalBufSize = 0; } // // HACK: START section stats merge // m_sectionStats.reset(); long sneed = 0; for ( long j = 0; j < m_numHosts ; j++ ) { Msg39Reply *mr = m_reply[j]; if ( ! mr ) continue; sneed += mr->size_siteHashList/4; } HashTableX dt; //char tmpBuf[5000]; if (sneed&&!dt.set(4,0,sneed,NULL,0,false, m_r->m_niceness,"uniqsit")) return true; for ( long j = 0; sneed && j < m_numHosts ; j++ ) { Msg39Reply *mr =m_reply[j]; if ( ! mr ) continue; SectionStats *src = &mr->m_sectionStats; SectionStats *dst = &m_sectionStats; dst->m_onSiteDocIds += src->m_onSiteDocIds; dst->m_offSiteDocIds += src->m_offSiteDocIds; // now the list should be the unique site hashes that // had the section hash. we need to uniquify them again // here. long *p = (long *)mr->ptr_siteHashList; long np = mr->size_siteHashList / 4; for ( long k = 0 ; k < np ; k++ ) // hash it up, no dups! dt.addKey(&p[k]); // update our count based on that dst->m_numUniqueSites = dt.getNumSlotsUsed(); } if ( m_r->m_getSectionStats ) return true; // // HACK: END section stats merge // if ( m_docsToGet <= 0 ) { char *xx=NULL; *xx=0; } // . how much do we need to store final merged docids, etc.? // . docid=8 score=4 bitScore=1 clusterRecs=key_t clusterLevls=1 long need = m_docsToGet * (8+4+sizeof(key_t)+sizeof(DocIdScore *)+1); // allocate it m_finalBuf = (char *)mmalloc ( need , "finalBuf" ); m_finalBufSize = need; // g_errno should be set if this fails if ( ! m_finalBuf ) return true; // hook into it char *p = m_finalBuf; m_docIds = (long long *)p; p += m_docsToGet * 8; m_scores = (float *)p; p += m_docsToGet * sizeof(float); m_clusterRecs = (key_t *)p; p += m_docsToGet * sizeof(key_t); m_clusterLevels = (char *)p; p += m_docsToGet * 1; m_scoreInfos = (DocIdScore **)p; p+=m_docsToGet*sizeof(DocIdScore *); // sanity check char *pend = m_finalBuf + need; if ( p != pend ) { char *xx = NULL; *xx =0; } // . now allocate for hash table // . get at least twice as many slots as docids HashTableT<long long,char> htable; // returns false and sets g_errno on error if ( ! htable.set ( m_docsToGet * 2 ) ) return true; // hash table for doing site clustering, provided we // are fully split and we got the site recs now HashTableT<long long,long> htable2; if ( m_r->m_doSiteClustering && ! htable2.set ( m_docsToGet * 2 ) ) return true; // // ***MERGE ALL SPLITS INTO m_docIds[], etc.*** // // . merge all lists in m_replyDocIds[splitNum] // . we may be re-called later after m_docsToGet is increased // if too many docids were clustered/filtered out after the call // to Msg51. mergeLoop: // the winning docid will be diPtr[maxj] long maxj = -1; //Msg39Reply *mr; long hslot; // get the next highest-scoring docids from all split lists for ( long j = 0; j < m_numHosts; j++ ) { // . skip exhausted lists // . these both should be NULL if reply was skipped because // we did a gbdocid:| query if ( diPtr[j] >= diEnd[j] ) continue; // compare the score if ( maxj == -1 ) { maxj = j; continue; } if ( *rsPtr[j] < *rsPtr[maxj] ) continue; if ( *rsPtr[j] > *rsPtr[maxj] ) { maxj = j; continue; } // prefer lower docids on top if ( *diPtr[j] < *diPtr[maxj] ) { maxj = j; continue; } } if ( maxj == -1 ) { m_moreDocIdsAvail = false; goto doneMerge; } // only do this logic if we have clusterdb recs included if ( m_r->m_doSiteClustering && // if the clusterLevel was set to CR_*errorCode* then this key // will be 0, so in that case, it might have been a not found // or whatever, so let it through regardless ksPtr[maxj]->n0 != 0LL && ksPtr[maxj]->n1 != 0 ) { // get the hostname hash, a long long long sh = g_clusterdb.getSiteHash26 ((char *)ksPtr[maxj]); // do we have enough from this hostname already? long slot = htable2.getSlot ( sh ); // if this hostname already visible, do not over-display it... if ( slot >= 0 ) { // get the count long val = htable2.getValueFromSlot ( slot ); // . if already 2 or more, give up // . if the site hash is 0, that usually means a // "not found" in clusterdb, and the accompanying // cluster level would be set as such, but since we // did not copy the cluster levels over in the merge // algo above, we don't know for sure... cluster recs // are set to 0 in the Msg39.cpp clustering. if ( sh && val >= 2 ) goto skip; // inc the count val++; // store it htable2.setValue ( slot , val ); } // . add it, this should be pre-allocated! // . returns false and sets g_errno on error else if ( ! htable2.addKey(sh,1) ) return true; } hslot = htable.getSlot ( *diPtr[maxj] ); // . only add it to the final list if the docid is "unique" // . BUT since different event ids share the same docid, exception! if ( hslot < 0 ) { // always inc this //m_totalDocCount++; // only do this if we need more if ( m_numDocIds < m_docsToGet ) { // get DocIdScore class for this docid Msg39Reply *mr = m_reply[maxj]; // point to the array of DocIdScores DocIdScore *ds = (DocIdScore *)mr->ptr_scoreInfo; long nds = mr->size_scoreInfo/sizeof(DocIdScore); DocIdScore *dp = NULL; for ( long i = 0 ; i < nds ; i++ ) { if ( ds[i].m_docId != *diPtr[maxj] ) continue; dp = &ds[i]; break; } // add the max to the final merged lists m_docIds [m_numDocIds] = *diPtr[maxj]; // wtf? if ( ! dp ) { // this is empty if no scoring info // supplied! if ( m_r->m_getDocIdScoringInfo ) log("msg3a: CRAP! got empty score " "info for " "d=%lli", m_docIds[m_numDocIds]); //char *xx=NULL; *xx=0; 261561804684 // qry = www.yahoo } // point to the single DocIdScore for this docid m_scoreInfos[m_numDocIds] = dp; // reset this just in case if ( dp ) { dp->m_singleScores = NULL; dp->m_pairScores = NULL; } // now fix DocIdScore::m_pairScores and m_singleScores // ptrs so they reference into the // Msg39Reply::ptr_pairScoreBuf and ptr_singleSingleBuf // like they should. it seems we do not free the // Msg39Replies so we should be ok referencing them. if ( dp && dp->m_singlesOffset >= 0 ) dp->m_singleScores = (SingleScore *)(mr->ptr_singleScoreBuf+ dp->m_singlesOffset) ; if ( dp && dp->m_pairsOffset >= 0 ) dp->m_pairScores = (PairScore *)(mr->ptr_pairScoreBuf + dp->m_pairsOffset ); // turn it into a float, that is what rscore_t is. // we do this to make it easier for PostQueryRerank.cpp m_scores [m_numDocIds]=(float)*rsPtr[maxj]; if ( m_r->m_doSiteClustering ) m_clusterRecs[m_numDocIds]= *ksPtr[maxj]; // clear this out //m_eventIdBits[m_numDocIds].clear(); // set this for use below hslot = m_numDocIds; // point to next available slot to add to m_numDocIds++; } // if it has ALL the required query terms, count it //if ( *bsPtr[maxj] & 0x60 ) m_numAbove++; // . add it, this should be pre-allocated! // . returns false and sets g_errno on error if ( ! htable.addKey(*diPtr[maxj],1) ) return true; } skip: // increment the split pointers from which we took the max rsPtr[maxj]++; diPtr[maxj]++; ksPtr[maxj]++; // get the next highest docid and add it in if ( m_numDocIds < m_docsToGet ) goto mergeLoop; doneMerge: if ( m_debug ) { // show how long it took logf( LOG_DEBUG,"query: msg3a: [%lu] merged %li docs from %li " "splits in %llu ms. " , (unsigned long)this, m_numDocIds, (long)m_numHosts, gettimeofdayInMilliseconds() - m_startTime ); // show the final merged docids for ( long i = 0 ; i < m_numDocIds ; i++ ) { long sh = 0; if ( m_r->m_doSiteClustering ) sh=g_clusterdb.getSiteHash26((char *) &m_clusterRecs[i]); // print out score_t logf(LOG_DEBUG,"query: msg3a: [%lu] " "%03li) merged docId=%012llu " "score=%.01f hosthash=0x%lx", (unsigned long)this, i, m_docIds [i] , (float)m_scores [i] , sh ); } } // if we had a full split, we should have gotten the cluster recs // from each split already memset ( m_clusterLevels , CR_OK , m_numDocIds ); return true; }