void checkNorms(IndexReaderPtr reader) { // test omit norms for (int32_t i = 0; i < DocHelper::fields.size(); ++i) { FieldPtr f = DocHelper::fields[i]; if (f->isIndexed()) { BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !f->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !DocHelper::noNorms.contains(f->name())); if (!reader->hasNorms(f->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms(f->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms.resize(reader->maxDoc()); reader->norms(f->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } }
boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc())); Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1)); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); int32_t t = 0; // current term number // an entry for documents that have no terms in this field should a document with no terms be at // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to // change as well. mterms[t++] = L""; LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field || t >= mterms.size() ) break; // store term text mterms[t] = term->text(); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = t; ++t; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (t == 0) { // if there are no terms, make the term array have a single null entry mterms = Collection<String>::newInstance(1); } else if (t < mterms.size()) { // if there are less terms than documents, trim off the dead array space mterms.resize(t); } return newLucene<StringIndex>(retArray, mterms); }
boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any doubles; try { doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); } catch (NumberFormatException&) { doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } return doubles; } Collection<double> retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; double termval = parser->parseDouble(term->text()); if (!retArray) // late init retArray = Collection<double>::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection<double>::newInstance(reader->maxDoc()); return retArray; }
boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; String termval(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }
static void verifyNumDocs(DirectoryPtr dir, int32_t numDocs) { IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(reader->maxDoc(), numDocs); BOOST_CHECK_EQUAL(reader->numDocs(), numDocs); reader->close(); }
void modifyNormsForF1(const IndexReaderPtr& ir) { int32_t n = ir->maxDoc(); for (int32_t i = 0; i < n; i += 3) { // modify for every third doc int32_t k = (i * 3) % modifiedNorms.size(); double origNorm = modifiedNorms[i]; double newNorm = modifiedNorms[k]; modifiedNorms[i] = newNorm; modifiedNorms[k] = origNorm; ir->setNorm(i, L"f1", newNorm); ir->setNorm(k, L"f1", origNorm); } }
void checkNorms(IndexReaderPtr reader) { for (Collection<FieldPtr>::iterator field = DocHelper::fields.begin(); field != DocHelper::fields.end(); ++field) { if ((*field)->isIndexed()) { BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !(*field)->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !DocHelper::noNorms.contains((*field)->name())); if (!reader->hasNorms((*field)->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms((*field)->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms = ByteArray::newInstance(reader->maxDoc()); reader->norms((*field)->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } }
static bool verifyIndex(DirectoryPtr directory, int32_t startAt) { bool fail = false; IndexReaderPtr reader = IndexReader::open(directory, true); int32_t max = reader->maxDoc(); for (int32_t i = 0; i < max; ++i) { DocumentPtr temp = reader->document(i); if (temp->getField(L"count")->stringValue() != StringUtils::toString(i + startAt)) fail = true; } reader->close(); return fail; }
boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom)); if (!parser) return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; uint8_t termval = parser->parseByte(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }