int main( int argc, char** argv ) { using plist_type = surf::postings_list<surf::compression_codec::optpfor,128>; // test small uncompressed lists for(size_t i=0;i<500;i++) { size_t n = 1 + rand()%20; std::vector< std::pair<uint64_t,uint64_t> > A; uint64_t cur_id = rand()%5000; for(size_t j=0;j<n;j++) { cur_id += rand()%5000; uint64_t cur_freq = 1 + rand() % 50; A.emplace_back(cur_id,cur_freq); } plist_type pl(A); auto itr = pl.begin(); auto end = pl.end(); size_t j=0; while( itr != end) { auto id = itr.docid(); auto freq = itr.freq(); if(id != A[j].first && freq != A[j].second) { std::cerr << "ERROR: uncompressed list"; } j++; ++itr; } } // test larger compressed lists for(size_t i=0;i<500;i++) { size_t n = 1 + rand()%20000; std::vector< std::pair<uint64_t,uint64_t> > A; uint64_t cur_id = rand()%500; for(size_t j=0;j<n;j++) { cur_id += rand()%500; uint64_t cur_freq = 1 + rand() % 50; A.emplace_back(cur_id,cur_freq); } plist_type pl(A); auto itr = pl.begin(); auto end = pl.end(); size_t j=0; while( itr != end) { auto id = itr.docid(); auto freq = itr.freq(); if(id != A[j].first && freq != A[j].second) { std::cerr << "ERROR: uncompressed list"; } j++; ++itr; } } }
void DS2I_ALWAYSINLINE next_geq(uint64_t lower_bound) { assert(lower_bound >= m_cur_docid || position() == 0); if (DS2I_UNLIKELY(lower_bound > m_cur_block_max)) { // binary search seems to perform worse here if (lower_bound > block_max(m_blocks - 1)) { m_cur_docid = m_universe; return; } uint64_t block = m_cur_block + 1; while (block_max(block) < lower_bound) { ++block; } decode_docs_block(block); } while (docid() < lower_bound) { m_cur_docid += m_docs_buf[++m_pos_in_block] + 1; assert(m_pos_in_block < m_cur_block_size); } }