C++ (Cpp) Index::documentLength Exemples

Langage de programmation: C++ (Cpp)

Class/Type: Index

Méthode/Fonction: documentLength

Exemples au hotexamples.com: 1

C++ (Cpp) Index::documentLength - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de Index::documentLength extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Find(30)

Add(26)

FindAdd(21)

acquireReader(21)

GetCount(15)

end(13)

PickKeys(10)

Z(9)

Y(9)

GetId(9)

X(9)

e(7)

Remove(5)

erase(5)

docCount(5)

GetKeys(5)

Clear(4)

AddObserver(4)

begin(4)

Contains(4)

docs(4)

c(3)

PreFetch(3)

buildIndex(3)

cbegin(3)

IsWatched(3)

acquireWriter(3)

cend(3)

SetParentId(2)

atMaxId(2)

data(2)

clear(2)

createIndex(2)

crbegin(2)

crend(2)

at(2)

IsEmpty(2)

GetSearch(2)

doc_size(2)

document(2)

bucket_count(2)

ForEachInRectForMWM(2)

applyVariableChange(2)

equal_range(2)

atVariationStart(2)

Unlink(2)

close(1)

builtin_class(1)

batch_kmer(1)

clearData(1)

Méthodes fréquemment utilisées

Find (30)

Add (26)

FindAdd (21)

acquireReader (21)

GetCount (15)

end (13)

PickKeys (10)

Z (9)

Y (9)

GetId (9)

Méthodes fréquemment utilisées

X (9)

e (7)

Remove (5)

erase (5)

docCount (5)

GetKeys (5)

Clear (4)

AddObserver (4)

begin (4)

Contains (4)

docs (4)

c (3)

PreFetch (3)

buildIndex (3)

cbegin (3)

IsWatched (3)

acquireWriter (3)

cend (3)

SetParentId (2)

atMaxId (2)

Méthodes fréquemment utilisées

docs (4)

c (3)

PreFetch (3)

buildIndex (3)

cbegin (3)

IsWatched (3)

acquireWriter (3)

cend (3)

SetParentId (2)

atMaxId (2)

data (2)

clear (2)

createIndex (2)

crbegin (2)

crend (2)

at (2)

IsEmpty (2)

GetSearch (2)

doc_size (2)

document (2)

bucket_count (2)

ForEachInRectForMWM (2)

applyVariableChange (2)

equal_range (2)

atVariationStart (2)

Unlink (2)

close (1)

builtin_class (1)

batch_kmer (1)

clearData (1)

Méthodes fréquemment utilisées

data (2)

clear (2)

createIndex (2)

crbegin (2)

crend (2)

at (2)

IsEmpty (2)

GetSearch (2)

doc_size (2)

document (2)

bucket_count (2)

ForEachInRectForMWM (2)

applyVariableChange (2)

equal_range (2)

atVariationStart (2)

Unlink (2)

close (1)

builtin_class (1)

batch_kmer (1)

clearData (1)

CloseIndexFile (1)

closeIndex (1)

docLengthAvg (1)

exp (1)

endScan (1)

empty (1)

dumpAsciiLists (1)

dump (1)

documentLength (1)

doc_path (1)

docLength (1)

column (1)

docInfoList (1)

deleteIndex (1)

debugPrint (1)

cross_alignment (1)

createFromOnDiskIndex (1)

constant (1)

compact (1)

debug (1)

Exemple #1

0

Afficher le fichier

Fichier : IndexWriter.cpp Projet : busjaeger/cs410sp12

void IndexWriter::_addInvertedListData( indri::utility::greedy_vector<WriterIndexContext*>& lists, indri::index::TermData* termData, indri::utility::Buffer& listBuffer, UINT64& endOffset ) { indri::utility::greedy_vector<WriterIndexContext*>::iterator iter; const int minimumSkip = 1<<12; // 4k int documentsWritten = 0; const float topdocsFraction = 0.01f; bool hasTopdocs = termData->corpus.documentCount > TOPDOCS_DOCUMENT_COUNT; bool isFrequent = termData->corpus.totalCount > FREQUENT_TERM_COUNT; int topdocsCount = hasTopdocs ? int(termData->corpus.documentCount * 0.01) : 0; int topdocsSpace = hasTopdocs ? ((topdocsCount*3*sizeof(UINT32)) + sizeof(int)) : 0; // write a control byte char control = (hasTopdocs ? 0x01 : 0) | (isFrequent ? 0x02 : 0); _invertedOutput->write( &control, 1 ); UINT64 initialPosition = _invertedOutput->tell(); // leave some room for the topdocs list if( hasTopdocs ) { _invertedOutput->seek( topdocsSpace + initialPosition ); } // maintain a list of top documents std::priority_queue<DocListIterator::TopDocument, std::vector<DocListIterator::TopDocument>, DocListIterator::TopDocument::greater> topdocs; double threshold = 0; int lastDocument = 0; int positions = 0; int docs = 0; // for each matching list: for( iter = lists.begin(); iter != lists.end(); ++iter ) { indri::index::DocListFileIterator::DocListData* listData = (*iter)->iterator->currentEntry(); DocListIterator* iterator = listData->iterator; Index* index = (*iter)->index; indri::utility::RVLCompressStream stream( listBuffer ); int listDocs = 0; int listPositions = 0; while( !iterator->finished() ) { // get the latest entry from the list DocListIterator::DocumentData* documentData = iterator->currentEntry(); // add to document counter docs++; listDocs++; // update the topdocs list if( hasTopdocs ) { int length = index->documentLength( documentData->document ); int count = documentData->positions.size(); // compute DocListIterator::TopDocument::greater (current, top()) // if false, no reason to insert this entry. // note that the test is inverted. // int(length * threshold) <= count is equivalent to // count/length > topdocs.top().count/topdocs.top().length // but we use < to force breaking a tie in favor of keeping // the first seen document. if( int(length * threshold) < count || topdocs.size() < topdocsCount ) { // form a topdocs entry for this document DocListIterator::TopDocument topDocument( documentData->document, count, length ); topdocs.push( topDocument ); while( topdocs.size() > topdocsCount ) topdocs.pop(); threshold = topdocs.top().count / double(topdocs.top().length); } } if( listBuffer.position() > minimumSkip ) { // time to write in a skip _writeBatch( _invertedOutput, documentData->document, listBuffer.position(), listBuffer ); // delta encode documents by batch lastDocument = 0; } assert( documentData->document > lastDocument ); // write this entry out to the list stream << documentData->document - lastDocument; stream << (int) documentData->positions.size(); lastDocument = documentData->document; int lastPosition = 0; for( int i=0; i<documentData->positions.size(); i++ ) { stream << (documentData->positions[i] - lastPosition); lastPosition = documentData->positions[i]; positions++; listPositions++; } iterator->nextEntry(); } indri::index::TermData* td = iterator->termData(); assert( listPositions == td->corpus.totalCount ); assert( listDocs == td->corpus.documentCount ); } assert( docs == termData->corpus.documentCount ); assert( positions == termData->corpus.totalCount ); // write in the final skip info _writeBatch( _invertedOutput, -1, listBuffer.position(), listBuffer ); UINT64 finalPosition = _invertedOutput->tell(); if( hasTopdocs ) { _invertedOutput->seek( initialPosition ); _invertedOutput->write( &topdocsCount, sizeof(int) ); assert( topdocs.size() == topdocsCount ); // write these into the topdocs list in order from smallest fraction to largest fraction, // where fraction = c(w;D)/|D| while( topdocs.size() ) { DocListIterator::TopDocument topDocument = topdocs.top(); _invertedOutput->write( &topDocument.document, sizeof(int) ); _invertedOutput->write( &topDocument.count, sizeof(int) ); _invertedOutput->write( &topDocument.length, sizeof(int) ); topdocs.pop(); } assert( (_invertedOutput->tell() - initialPosition) == topdocsSpace ); _invertedOutput->seek( finalPosition ); } endOffset = finalPosition; }