/// Indexes the given data. bool XapianIndex::indexDocument(Tokenizer &tokens, const std::set<std::string> &labels, unsigned int &docId) { unsigned int dataLength = 0; bool indexed = false; XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false); if (pDatabase == NULL) { cerr << "Bad index " << m_databaseName << endl; return false; } try { // Get the document const Document *pDocument = tokens.getDocument(); if (pDocument == NULL) { #ifdef DEBUG cout << "XapianIndex::indexDocument: no document" << endl; #endif return false; } // Cache the document's properties DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(), pDocument->getType(), pDocument->getLanguage()); docInfo.setTimestamp(pDocument->getTimestamp()); docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation())); const char *pData = pDocument->getData(dataLength); if (pData != NULL) { m_stemLanguage = scanDocument(pData, dataLength, docInfo); } Xapian::Document doc; Xapian::termcount termPos = 0; #ifdef DEBUG cout << "XapianIndex::indexDocument: adding terms" << endl; #endif // Add the tokenizer's terms to the Xapian document addPostingsToDocument(tokens, doc, "", termPos, m_stemMode); // Add labels for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end(); ++labelIter) { doc.add_term(limitTermLength(string("XLABEL:") + *labelIter)); } if (addCommonTerms(docInfo, doc, termPos) == true) { setDocumentData(docInfo, doc, m_stemLanguage); Xapian::WritableDatabase *pIndex = pDatabase->writeLock(); if (pIndex != NULL) { // Add this document to the Xapian index docId = pIndex->add_document(doc); indexed = true; } } } catch (const Xapian::Error &error) { cerr << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl; } catch (...) { cerr << "Couldn't index document, unknown exception occured" << endl; } pDatabase->unlock(); return indexed; }
/// Updates the given document; true if success. bool XapianIndex::updateDocument(unsigned int docId, Tokenizer &tokens) { unsigned int dataLength = 0; bool updated = false; const Document *pDocument = tokens.getDocument(); if (pDocument == NULL) { return false; } XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false); if (pDatabase == NULL) { cerr << "Bad index " << m_databaseName << endl; return false; } const char *pData = pDocument->getData(dataLength); if (pData == NULL) { return false; } // Cache the document's properties DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(), pDocument->getType(), pDocument->getLanguage()); docInfo.setTimestamp(pDocument->getTimestamp()); docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation())); // Don't scan the document if a language is specified m_stemLanguage = Languages::toEnglish(pDocument->getLanguage()); if (m_stemLanguage.empty() == true) { m_stemLanguage = scanDocument(pData, dataLength, docInfo); } try { set<string> labels; Xapian::Document doc; Xapian::termcount termPos = 0; // Add the tokenizer's terms to the document addPostingsToDocument(tokens, doc, "", termPos, m_stemMode); // Get the document's labels if (getDocumentLabels(docId, labels) == true) { // Add labels for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end(); ++labelIter) { doc.add_term(limitTermLength(string("XLABEL:") + *labelIter)); } } if (addCommonTerms(docInfo, doc, termPos) == true) { setDocumentData(docInfo, doc, m_stemLanguage); Xapian::WritableDatabase *pIndex = pDatabase->writeLock(); if (pIndex != NULL) { // Update the document in the database pIndex->replace_document(docId, doc); updated = true; } } } catch (const Xapian::Error &error) { cerr << "Couldn't update document: " << error.get_type() << ": " << error.get_msg() << endl; } catch (...) { cerr << "Couldn't update document, unknown exception occured" << endl; } pDatabase->unlock(); return updated; }