C++ (Cpp) HashTableT::set примеры использования

Язык программирования: C++ (Cpp)
Класс/Тип: HashTableT
Метод/Функция: set
Примеров на hotexamples.com: 1
C++ (Cpp) HashTableT::set - 1 пример найден. Это лучшие примеры C++ (Cpp) кода для HashTableT::set, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
addKey(2)
getSlot(2)
set(1)
Основные методы
addKey (2)
getSlot (2)
set (1)
Пример #1
Показать файл
Файл: Msg3a.cpp Проект: rdhananjaya/open-source-search-engine
// . merge all the replies together
// . put final merged docids into m_docIds[],m_bitScores[],m_scores[],...
// . this calls Msg51 to get cluster levels when done merging
// . Msg51 remembers clusterRecs from previous call to avoid repeating lookups
// . returns false if blocked, true otherwise
// . sets g_errno and returns true on error
bool Msg3a::mergeLists ( ) {

    // time how long the merge takes
    if ( m_debug ) {
        logf( LOG_DEBUG, "query: msg3a: --- Final DocIds --- " );
        m_startTime = gettimeofdayInMilliseconds();
    }

    // reset our final docids count here in case we are a re-call
    m_numDocIds = 0;
    // a secondary count, how many unique docids we scanned, and not
    // necessarily added to the m_docIds[] array
    //m_totalDocCount = 0; // long docCount = 0;
    m_moreDocIdsAvail = true;


    // shortcut
    //long numSplits = m_numHosts;//indexdbSplit;

    // . point to the various docids, etc. in each split reply
    // . tcPtr = term count. how many required query terms does the doc
    //   have? formerly called topExplicits in IndexTable2.cpp
    long long     *diPtr [MAX_INDEXDB_SPLIT];
    float         *rsPtr [MAX_INDEXDB_SPLIT];
    key_t         *ksPtr [MAX_INDEXDB_SPLIT];
    long long     *diEnd [MAX_INDEXDB_SPLIT];
    for ( long j = 0; j < m_numHosts ; j++ ) {
        Msg39Reply *mr =m_reply[j];
        // if we have gbdocid:| in query this could be NULL
        if ( ! mr ) {
            diPtr[j] = NULL;
            diEnd[j] = NULL;
            rsPtr[j] = NULL;
            ksPtr[j] = NULL;
            continue;
        }
        diPtr [j] = (long long *)mr->ptr_docIds;
        rsPtr [j] = (float     *)mr->ptr_scores;
        ksPtr [j] = (key_t     *)mr->ptr_clusterRecs;
        diEnd [j] = (long long *)(mr->ptr_docIds +
                                  mr->m_numDocIds * 8);
    }

    // clear if we had it
    if ( m_finalBuf ) {
        mfree ( m_finalBuf, m_finalBufSize, "Msg3aF" );
        m_finalBuf     = NULL;
        m_finalBufSize = 0;
    }

    //
    // HACK: START section stats merge
    //
    m_sectionStats.reset();
    long sneed = 0;
    for ( long j = 0; j < m_numHosts ; j++ ) {
        Msg39Reply *mr = m_reply[j];
        if ( ! mr ) continue;
        sneed += mr->size_siteHashList/4;
    }
    HashTableX dt;
    //char tmpBuf[5000];
    if (sneed&&!dt.set(4,0,sneed,NULL,0,false,
                       m_r->m_niceness,"uniqsit"))
        return true;
    for ( long j = 0; sneed && j < m_numHosts ; j++ ) {
        Msg39Reply *mr =m_reply[j];
        if ( ! mr ) continue;
        SectionStats *src = &mr->m_sectionStats;
        SectionStats *dst = &m_sectionStats;
        dst->m_onSiteDocIds      += src->m_onSiteDocIds;
        dst->m_offSiteDocIds     += src->m_offSiteDocIds;
        // now the list should be the unique site hashes that
        // had the section hash. we need to uniquify them again
        // here.
        long *p = (long *)mr->ptr_siteHashList;
        long np = mr->size_siteHashList / 4;
        for ( long k = 0 ; k < np ; k++ )
            // hash it up, no dups!
            dt.addKey(&p[k]);
        // update our count based on that
        dst->m_numUniqueSites = dt.getNumSlotsUsed();
    }
    if ( m_r->m_getSectionStats ) return true;
    //
    // HACK: END section stats merge
    //


    if ( m_docsToGet <= 0 ) {
        char *xx=NULL;
        *xx=0;
    }

    // . how much do we need to store final merged docids, etc.?
    // . docid=8 score=4 bitScore=1 clusterRecs=key_t clusterLevls=1
    long need = m_docsToGet * (8+4+sizeof(key_t)+sizeof(DocIdScore *)+1);
    // allocate it
    m_finalBuf     = (char *)mmalloc ( need , "finalBuf" );
    m_finalBufSize = need;
    // g_errno should be set if this fails
    if ( ! m_finalBuf ) return true;
    // hook into it
    char *p = m_finalBuf;
    m_docIds        = (long long *)p;
    p += m_docsToGet * 8;
    m_scores        = (float     *)p;
    p += m_docsToGet * sizeof(float);
    m_clusterRecs   = (key_t     *)p;
    p += m_docsToGet * sizeof(key_t);
    m_clusterLevels = (char      *)p;
    p += m_docsToGet * 1;
    m_scoreInfos    = (DocIdScore **)p;
    p+=m_docsToGet*sizeof(DocIdScore *);

    // sanity check
    char *pend = m_finalBuf + need;
    if ( p != pend ) {
        char *xx = NULL;
        *xx =0;
    }
    // . now allocate for hash table
    // . get at least twice as many slots as docids
    HashTableT<long long,char> htable;
    // returns false and sets g_errno on error
    if ( ! htable.set ( m_docsToGet * 2 ) ) return true;
    // hash table for doing site clustering, provided we
    // are fully split and we got the site recs now
    HashTableT<long long,long> htable2;
    if ( m_r->m_doSiteClustering && ! htable2.set ( m_docsToGet * 2 ) )
        return true;

    //
    // ***MERGE ALL SPLITS INTO m_docIds[], etc.***
    //
    // . merge all lists in m_replyDocIds[splitNum]
    // . we may be re-called later after m_docsToGet is increased
    //   if too many docids were clustered/filtered out after the call
    //   to Msg51.
mergeLoop:

    // the winning docid will be diPtr[maxj]
    long maxj = -1;
    //Msg39Reply *mr;
    long hslot;

    // get the next highest-scoring docids from all split lists
    for ( long j = 0; j < m_numHosts; j++ ) {
        // . skip exhausted lists
        // . these both should be NULL if reply was skipped because
        //   we did a gbdocid:| query
        if ( diPtr[j] >= diEnd[j] ) continue;
        // compare the score
        if ( maxj == -1 ) {
            maxj = j;
            continue;
        }
        if ( *rsPtr[j] < *rsPtr[maxj] ) continue;
        if ( *rsPtr[j] > *rsPtr[maxj] ) {
            maxj = j;
            continue;
        }
        // prefer lower docids on top
        if ( *diPtr[j] < *diPtr[maxj] ) {
            maxj = j;
            continue;
        }
    }

    if ( maxj == -1 ) {
        m_moreDocIdsAvail = false;
        goto doneMerge;
    }

    // only do this logic if we have clusterdb recs included
    if ( m_r->m_doSiteClustering     &&
            // if the clusterLevel was set to CR_*errorCode* then this key
            // will be 0, so in that case, it might have been a not found
            // or whatever, so let it through regardless
            ksPtr[maxj]->n0 != 0LL &&
            ksPtr[maxj]->n1 != 0   ) {
        // get the hostname hash, a long long
        long sh = g_clusterdb.getSiteHash26 ((char *)ksPtr[maxj]);
        // do we have enough from this hostname already?
        long slot = htable2.getSlot ( sh );
        // if this hostname already visible, do not over-display it...
        if ( slot >= 0 ) {
            // get the count
            long val = htable2.getValueFromSlot ( slot );
            // . if already 2 or more, give up
            // . if the site hash is 0, that usually means a
            //   "not found" in clusterdb, and the accompanying
            //   cluster level would be set as such, but since we
            //   did not copy the cluster levels over in the merge
            //   algo above, we don't know for sure... cluster recs
            //   are set to 0 in the Msg39.cpp clustering.
            if ( sh && val >= 2 ) goto skip;
            // inc the count
            val++;
            // store it
            htable2.setValue ( slot , val );
        }
        // . add it, this should be pre-allocated!
        // . returns false and sets g_errno on error
        else if ( ! htable2.addKey(sh,1) ) return true;
    }

    hslot = htable.getSlot ( *diPtr[maxj] );

    // . only add it to the final list if the docid is "unique"
    // . BUT since different event ids share the same docid, exception!
    if ( hslot < 0 ) {
        // always inc this
        //m_totalDocCount++;
        // only do this if we need more
        if ( m_numDocIds < m_docsToGet ) {
            // get DocIdScore class for this docid
            Msg39Reply *mr = m_reply[maxj];
            // point to the array of DocIdScores
            DocIdScore *ds = (DocIdScore *)mr->ptr_scoreInfo;
            long nds = mr->size_scoreInfo/sizeof(DocIdScore);
            DocIdScore *dp = NULL;
            for ( long i = 0 ; i < nds ; i++ ) {
                if ( ds[i].m_docId != *diPtr[maxj] )  continue;
                dp = &ds[i];
                break;
            }
            // add the max to the final merged lists
            m_docIds    [m_numDocIds] = *diPtr[maxj];

            // wtf?
            if ( ! dp ) {
                // this is empty if no scoring info
                // supplied!
                if ( m_r->m_getDocIdScoringInfo )
                    log("msg3a: CRAP! got empty score "
                        "info for "
                        "d=%lli",
                        m_docIds[m_numDocIds]);
                //char *xx=NULL; *xx=0;  261561804684
                // qry = www.yahoo
            }
            // point to the single DocIdScore for this docid
            m_scoreInfos[m_numDocIds] = dp;

            // reset this just in case
            if ( dp ) {
                dp->m_singleScores = NULL;
                dp->m_pairScores   = NULL;
            }

            // now fix DocIdScore::m_pairScores and m_singleScores
            // ptrs so they reference into the
            // Msg39Reply::ptr_pairScoreBuf and ptr_singleSingleBuf
            // like they should. it seems we do not free the
            // Msg39Replies so we should be ok referencing them.
            if ( dp && dp->m_singlesOffset >= 0 )
                dp->m_singleScores =
                    (SingleScore *)(mr->ptr_singleScoreBuf+
                                    dp->m_singlesOffset) ;
            if ( dp && dp->m_pairsOffset >= 0 )
                dp->m_pairScores =
                    (PairScore *)(mr->ptr_pairScoreBuf +
                                  dp->m_pairsOffset );


            // turn it into a float, that is what rscore_t is.
            // we do this to make it easier for PostQueryRerank.cpp
            m_scores    [m_numDocIds]=(float)*rsPtr[maxj];
            if ( m_r->m_doSiteClustering )
                m_clusterRecs[m_numDocIds]= *ksPtr[maxj];
            // clear this out
            //m_eventIdBits[m_numDocIds].clear();
            // set this for use below
            hslot = m_numDocIds;
            // point to next available slot to add to
            m_numDocIds++;
        }
        // if it has ALL the required query terms, count it
        //if ( *bsPtr[maxj] & 0x60 ) m_numAbove++;
        // . add it, this should be pre-allocated!
        // . returns false and sets g_errno on error
        if ( ! htable.addKey(*diPtr[maxj],1) ) return true;
    }

skip:
    // increment the split pointers from which we took the max
    rsPtr[maxj]++;
    diPtr[maxj]++;
    ksPtr[maxj]++;
    // get the next highest docid and add it in
    if ( m_numDocIds < m_docsToGet ) goto mergeLoop;

doneMerge:

    if ( m_debug ) {
        // show how long it took
        logf( LOG_DEBUG,"query: msg3a: [%lu] merged %li docs from %li "
              "splits in %llu ms. "
              ,
              (unsigned long)this,
              m_numDocIds, (long)m_numHosts,
              gettimeofdayInMilliseconds() - m_startTime
            );
        // show the final merged docids
        for ( long i = 0 ; i < m_numDocIds ; i++ ) {
            long sh = 0;
            if ( m_r->m_doSiteClustering )
                sh=g_clusterdb.getSiteHash26((char *)
                                             &m_clusterRecs[i]);
            // print out score_t
            logf(LOG_DEBUG,"query: msg3a: [%lu] "
                 "%03li) merged docId=%012llu "
                 "score=%.01f hosthash=0x%lx",
                 (unsigned long)this,
                 i,
                 m_docIds    [i] ,
                 (float)m_scores    [i] ,
                 sh );
        }
    }

    // if we had a full split, we should have gotten the cluster recs
    // from each split already
    memset ( m_clusterLevels , CR_OK , m_numDocIds );

    return true;
}