bool OnDiskHandler::fileMoved(const string &fileName, const string &previousFileName) { bool handledEvent = false; #ifdef DEBUG cout << "OnDiskHandler::fileMoved: " << fileName << endl; #endif pthread_mutex_lock(&m_mutex); unsigned int oldDocId = m_index.hasDocument(string("file://") + previousFileName); if (oldDocId > 0) { DocumentInfo docInfo; if (m_index.getDocumentInfo(oldDocId, docInfo) == true) { // Change the location docInfo.setLocation(string("file://") + fileName); handledEvent = replaceFile(oldDocId, docInfo); } } pthread_mutex_unlock(&m_mutex); return handledEvent; }
// // Updates a document's properties. // void IndexTree::updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo) { if (docId == 0) { return; } // Go through the list of indexed documents TreeModel::Children children = m_refStore->children(); for (TreeModel::Children::iterator iter = children.begin(); iter != children.end(); ++iter) { TreeModel::Row row = *iter; if (docId == row[m_indexColumns.m_id]) { row[m_indexColumns.m_text] = to_utf8(docInfo.getTitle()); row[m_indexColumns.m_type] = to_utf8(docInfo.getType()); row[m_indexColumns.m_language] = to_utf8(docInfo.getLanguage()); row[m_indexColumns.m_timestamp] = to_utf8(docInfo.getTimestamp()); #ifdef DEBUG cout << "IndexTree::updateDocumentInfo: language now " << docInfo.getLanguage() << endl; #endif break; } } }
bool ThreadsManager::index_document(const DocumentInfo &docInfo) { string location(docInfo.getLocation()); if (location.empty() == true) { // Nothing to do return false; } // If the document is a mail message, we can't index it again Url urlObj(location); if (urlObj.getProtocol() == "mailbox") { return false; } // Is the document being indexed/updated ? if (write_lock_lists() == true) { bool beingProcessed = true; if (m_beingIndexed.find(location) == m_beingIndexed.end()) { m_beingIndexed.insert(location); beingProcessed = false; } unlock_lists(); if (beingProcessed == true) { // FIXME: we may have to set labels on this document return false; } } // Is it an update ? IndexInterface *pIndex = PinotSettings::getInstance().getIndex(m_defaultIndexLocation); if (pIndex == NULL) { return false; } unsigned int docId = pIndex->hasDocument(docInfo.getLocation()); if (docId > 0) { // Yes, it is start_thread(new IndexingThread(docInfo, docId, m_defaultIndexLocation)); } else { // This is a new document start_thread(new IndexingThread(docInfo, docId, m_defaultIndexLocation)); } delete pIndex; return true; }
/// Returns a document's properties. bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const { bool foundDocument = false; if (docId == 0) { return false; } XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false); if (pDatabase == NULL) { cerr << "Bad index " << m_databaseName << endl; return false; } try { Xapian::Database *pIndex = pDatabase->readLock(); if (pIndex != NULL) { Xapian::Document doc = pIndex->get_document(docId); // Get the current document data string record = doc.get_data(); if (record.empty() == false) { string language = Languages::toLocale(StringManip::extractField(record, "language=", "")); docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"), StringManip::extractField(record, "url=", "\n"), StringManip::extractField(record, "type=", "\n"), language); docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n")); #ifdef DEBUG cout << "XapianIndex::getDocumentInfo: language is " << docInfo.getLanguage() << endl; #endif foundDocument = true; } } } catch (const Xapian::Error &error) { cerr << "Couldn't get document properties: " << error.get_msg() << endl; } catch (...) { cerr << "Couldn't get document properties, unknown exception occured" << endl; } pDatabase->unlock(); return foundDocument; }
bool MetaDataBackup::setAttribute(const DocumentInfo &docInfo, const string &name, const string &value, bool noXAttr) { string url(docInfo.getLocation()); string urlWithIPath(docInfo.getLocation(true)); #ifdef HAVE_ATTR_XATTR_H Url urlObj(url); // If the file is local and isn't a nested document, use an extended attribute if ((noXAttr == false) && (urlObj.isLocal() == true) && (docInfo.getInternalPath().empty() == true)) { string fileName(url.substr(urlObj.getProtocol().length() + 3)); string attrName("pinot." + name); // Set an attribute, and add an entry in the table if (setxattr(fileName.c_str(), attrName.c_str(), value.c_str(), (size_t)value.length(), 0) != 0) { #ifdef DEBUG cout << "MetaDataBackup::setAttribute: setxattr failed with " << strerror(errno) << endl; #endif } } #endif bool update = false, success = false; // Is there already such an item for this URL ? SQLResults *results = executeStatement("SELECT Url FROM MetaDataBackup \ WHERE Url='%q' AND Name='%q';", Url::escapeUrl(urlWithIPath).c_str(), name.c_str()); if (results != NULL) { SQLRow *row = results->nextRow(); if (row != NULL) { // Yes, there is update = true; delete row; } delete results; } if (update == false) { results = executeStatement("INSERT INTO MetaDataBackup \ VALUES('%q', '%q', '%q');", Url::escapeUrl(urlWithIPath).c_str(), name.c_str(), value.c_str()); }
void SimpleIndex::insert(ifstream *in, string filename) { // Get next doc id int docid=doccount++; // Save filename info doc_info.set(docid,"filename",filename); // Get document language ... ifstream tempstream; tempstream.open(filename); set<string> *langs=lang_rec.recognize(&tempstream); // Only set language if result is unambigous string lang; if (langs->size()==1) lang=*(langs->begin()); else lang=""; tempstream.close(); doc_info.set(docid,"language",lang); #ifdef DEBUG cerr << "Got " << lang << endl; #endif // Store in the list of all doc ids doc_ids.add(docid); string word; // Read the whole file word by word to the end while(!in->eof()) { *in >> word; string nword=u.normalize(word); // Add to index if (lemma_forward[lang][nword]!="") { //#ifdef DEBUG cout << "Got lemma " << lemma_forward[lang][nword] << " for word " << nword << endl; //#endif for (auto it=lemma_backward[lang][lemma_forward[lang][nword]].begin(); it!=lemma_backward[lang][lemma_forward[lang][nword]].end(); ++it) { inverted_index[*it].first++; inverted_index[*it].second.add(docid); } } else { #ifdef DEBUG cout << "Got no lemma for word " << nword << endl; #endif inverted_index[nword].first++; inverted_index[nword].second.add(docid); } } }
bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc, Xapian::termcount &termPos) const { string title(info.getTitle()); string location(info.getLocation()); Url urlObj(location); // Add a magic term :-) doc.add_term(MAGIC_TERM); // Index the title with and without prefix S if (title.empty() == false) { Document titleDoc; titleDoc.setData(title.c_str(), title.length()); Tokenizer titleTokens(&titleDoc); addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM); titleTokens.rewind(); addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode); } // Index the full URL with prefix U doc.add_term(limitTermLength(string("U") + location, true)); // ...the host name and included domains with prefix H string hostName(StringManip::toLowerCase(urlObj.getHost())); if (hostName.empty() == false) { doc.add_term(limitTermLength(string("H") + hostName, true)); string::size_type dotPos = hostName.find('.'); while (dotPos != string::npos) { doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true)); // Next dotPos = hostName.find('.', dotPos + 1); } } // ...and the file name with prefix P string fileName(urlObj.getFile()); if (fileName.empty() == false) { doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true)); } // Finally, add the language code with prefix L doc.add_term(string("L") + Languages::toCode(m_stemLanguage)); setDocumentData(doc, info, m_stemLanguage); return true; }
/// Updates a document's properties. bool DBusXapianIndex::updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo) { bool updated = false; DBusGConnection *pBus = getBusConnection(); if (pBus == NULL) { return false; } DBusGProxy *pBusProxy = getBusProxy(pBus); if (pBusProxy == NULL) { cerr << "DBusXapianIndex::updateDocumentInfo: couldn't get bus proxy" << endl; return false; } GError *pError = NULL; const char *pTitle = docInfo.getTitle().c_str(); const char *pLocation = docInfo.getLocation().c_str(); const char *pType = docInfo.getType().c_str(); string language(Languages::toEnglish(docInfo.getLanguage())); const char *pLanguage = language.c_str(); if (dbus_g_proxy_call(pBusProxy, "SetDocumentInfo", &pError, G_TYPE_UINT, docId, G_TYPE_STRING, pTitle, G_TYPE_STRING, pLocation, G_TYPE_STRING, pType, G_TYPE_STRING, pLanguage, G_TYPE_INVALID, G_TYPE_UINT, &docId, G_TYPE_INVALID) == TRUE) { updated = true; } else { if (pError != NULL) { cerr << "DBusXapianIndex::updateDocumentInfo: " << pError->message << endl; g_error_free(pError); } } g_object_unref(pBusProxy); // FIXME: don't we have to call dbus_g_connection_unref(pBus); ? return updated; }
void DirectoryScannerThread::foundFile(const DocumentInfo &docInfo) { if ((docInfo.getLocation().empty() == true) || (m_done == true)) { return; } stringstream labelStream; // This identifies the source labelStream << "X-SOURCE" << m_sourceId; #ifdef DEBUG cout << "DirectoryScannerThread::foundFile: source label for " << docInfo.getLocation() << " is " << labelStream.str() << endl; #endif m_signalFileFound(docInfo, labelStream.str(), false); }
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc, const string &language) const { string title(info.getTitle()); string timestamp(info.getTimestamp()); char timeStr[64]; time_t timeT = TimeConverter::fromTimestamp(timestamp); // Set the document data omindex-style string record = "url="; record += info.getLocation(); // The sample will be generated at query time record += "\nsample="; record += "\ncaption="; if (badField(title) == true) { // Modify the title if necessary string::size_type pos = title.find("="); while (pos != string::npos) { title[pos] = ' '; pos = title.find("=", pos + 1); } #ifdef DEBUG cout << "XapianIndex::setDocumentData: modified title" << endl; #endif } record += title; record += "\ntype="; record += info.getType(); // Append a timestamp, in a format compatible with Omega record += "\nmodtime="; snprintf(timeStr, 64, "%ld", timeT); record += timeStr; // ...and the language record += "\nlanguage="; record += StringManip::toLowerCase(language); #ifdef DEBUG cout << "XapianIndex::setDocumentData: document data is " << record << endl; #endif doc.set_data(record); // Add this value to allow sorting by date doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT)); }
void SimpleIndex::restore_index(ifstream *infile) { while(!infile->eof()) { string line; getline(*infile,line); stringstream linestream(line); string scount,sdoclist,sword; getline(linestream,scount,'\t'); getline(linestream,sdoclist,'\t'); getline(linestream,sword); inverted_index[sword].first+=atoi(scount.c_str()); stringstream docstream(sdoclist); while(!docstream.eof()) { string sdocinfo; string sdocnum, sdocfilename, sdoccount, sdoclang; getline(docstream,sdocinfo,'|'); stringstream docinfostream(sdocinfo); getline(docinfostream,sdocnum,':'); getline(docinfostream,sdocfilename,':'); getline(docinfostream,sdoccount,':'); getline(docinfostream,sdoclang,':'); if (sdocnum!=""&& sdoccount!="") { int docnum=atoi(sdocnum.c_str()); doc_ids.add(docnum); inverted_index[sword].second.add(docnum,atoi(sdoccount.c_str())); if (sdocfilename!="") { doc_info.set(docnum,"filename",sdocfilename); } else { doc_info.set(docnum,"filename",string("Lost in translation")); } if (sdoclang!="") { doc_info.set(docnum,"language",sdoclang); } } } } }
void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info, const string &language) const { string title(info.getTitle()); string timestamp(info.getTimestamp()); char timeStr[64]; // Set the document data omindex-style string record = "url="; record += info.getLocation(); // The sample will be generated at query time record += "\nsample="; record += "\ncaption="; if (badField(title) == true) { // Modify the title if necessary string::size_type pos = title.find("="); while (pos != string::npos) { title[pos] = ' '; pos = title.find("=", pos + 1); } #ifdef DEBUG cout << "XapianIndex::setDocumentData: modified title" << endl; #endif } record += title; record += "\ntype="; record += info.getType(); // Append a timestamp record += "\ntimestamp="; record += timestamp; // ...and the language record += "\nlanguage="; record += language; #ifdef DEBUG cout << "XapianIndex::setDocumentData: document data is " << record << endl; #endif doc.set_data(record); // Add this value to allow sorting by date snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp)); doc.add_value(0, timeStr); }
/// Updates a document's properties. bool XapianIndex::updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo) { bool updated = false; if (docId == 0) { return false; } XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false); if (pDatabase == NULL) { cerr << "Bad index " << m_databaseName << endl; return false; } try { Xapian::WritableDatabase *pIndex = pDatabase->writeLock(); if (pIndex != NULL) { Xapian::Document doc = pIndex->get_document(docId); #ifdef DEBUG cout << "XapianIndex::updateDocumentInfo: language is " << docInfo.getLanguage() << endl; #endif // Update the document data with the current language setDocumentData(doc, docInfo, docInfo.getLanguage()); pIndex->replace_document(docId, doc); updated = true; } } catch (const Xapian::Error &error) { cerr << "Couldn't update document properties: " << error.get_msg() << endl; } catch (...) { cerr << "Couldn't update document properties, unknown exception occured" << endl; } pDatabase->unlock(); return updated; }
bool OnDiskHandler::replaceFile(unsigned int docId, DocumentInfo &docInfo) { FilterWrapper wrapFilter(&m_index); // Unindex the destination file wrapFilter.unindexDocument(docInfo.getLocation()); // Update the document info return m_index.updateDocumentInfo(docId, docInfo); }
bool DocumentInfo::operator<(const DocumentInfo& other) const { string thisUrl(getField("url")); string otherUrl(other.getField("url")); if (thisUrl < otherUrl) { return true; } else if (thisUrl == otherUrl) { if (getField("ipath") < other.getField("ipath")) { return true; } } return false; }
bool OnDiskHandler::directoryMoved(const string &dirName, const string &previousDirName) { set<unsigned int> docIdList; bool handledEvent = false; #ifdef DEBUG cout << "OnDiskHandler::directoryMoved: " << dirName << endl; #endif pthread_mutex_lock(&m_mutex); if (m_index.listDocumentsInDirectory(previousDirName, docIdList) == true) { for (set<unsigned int>::const_iterator iter = docIdList.begin(); iter != docIdList.end(); ++iter) { DocumentInfo docInfo; if (m_index.getDocumentInfo(*iter, docInfo) == true) { string newLocation(docInfo.getLocation()); string::size_type pos = newLocation.find(previousDirName); if (pos != string::npos) { newLocation.replace(pos, previousDirName.length(), dirName); // Change the location docInfo.setLocation(newLocation); replaceFile(*iter, docInfo); } } } handledEvent = true; } #ifdef DEBUG else cout << "OnDiskHandler::directoryMoved: no documents in " << previousDirName << endl; #endif pthread_mutex_unlock(&m_mutex); return handledEvent; }
ustring ThreadsManager::index_document(const DocumentInfo &docInfo) { string location(docInfo.getLocation()); if (location.empty() == true) { // Nothing to do return ""; } // If the document is a mail message, we can't index it again Url urlObj(location); if (urlObj.getProtocol() == "mailbox") { return _("Can't index mail here"); } // Is the document being indexed/updated ? if (write_lock_lists() == true) { bool beingProcessed = true; if (m_beingIndexed.find(location) == m_beingIndexed.end()) { m_beingIndexed.insert(location); beingProcessed = false; } unlock_lists(); if (beingProcessed == true) { // FIXME: we may have to set labels on this document ustring status(location); status += " "; status += _("is already being indexed"); return status; } } // Is the document blacklisted ? if (PinotSettings::getInstance().isBlackListed(location) == true) { ustring status(location); status += " "; status += _("is blacklisted"); return status; } start_thread(new IndexingThread(docInfo, m_defaultIndexLocation)); return ""; }
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc, const string &language) const { time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp()); // Add this value to allow sorting by date doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT)); DocumentInfo docCopy(info); docCopy.setLanguage(language); doc.set_data(XapianDatabase::propsToRecord(&docCopy)); }
DocumentInfo DocumentInfoModel::getDocumentInfo(QString type) { DocumentInfo info; query = new QSqlQuery(Database::getInstance().db); query->prepare("SELECT documents.type, documents.name, numbering, after_text, family " "FROM documents, wh_numbering WHERE documents.type = wh_numbering.type AND warehouse = ? AND documents.type = ?"); query->addBindValue(ApplicationManager::getInstance()->getWarehouse()->getId()); query->addBindValue(type); query->exec(); if(this->isQueryError(query)) throw new SQLException("DocumentInfoModel::getDocumentInfo", query); if(query->size() > 0) { query->first(); info.setType(query->value(0).toString()); info.setName(query->value(1).toString()); info.setNumbering(query->value(2).toString()); info.setAfterText(query->value(3).toString()); info.setFamily(this->getFamily(query->value(4).toString())); } delete query; return info; }
QVector<DocumentInfo> DocumentInfoModel::getDocumentsInfo() { QVector<DocumentInfo> docsInfo; query = new QSqlQuery(Database::getInstance().db); query->prepare("SELECT documents.type, name, numbering, after_text, family " "FROM documents, wh_numbering WHERE documents.type = wh_numbering.type AND warehouse = ?"); query->addBindValue(ApplicationManager::getInstance()->getWarehouse()->getId()); query->exec(); if(this->isQueryError(query)) throw new SQLException("DocumentInfoModel::getDocumentsInfo", query); DocumentInfo info; while(query->next()) { info.setType(query->value(0).toString()); info.setName(query->value(1).toString()); info.setNumbering(query->value(2).toString()); info.setAfterText(query->value(3).toString()); info.setFamily(this->getFamily(query->value(4).toString())); docsInfo.push_back(info); } delete query; return docsInfo; }
void DirectoryScannerThread::foundFile(const DocumentInfo &docInfo) { char labelStr[64]; if ((docInfo.getLocation().empty() == true) || (m_done == true)) { return; } // This identifies the source snprintf(labelStr, 64, "X-SOURCE%u", m_sourceId); m_signalFileFound(docInfo, labelStr, false); }
/// Updates a document's properties. bool XapianIndex::updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo) { bool updated = false; if (docId == 0) { return false; } XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false); if (pDatabase == NULL) { cerr << "Bad index " << m_databaseName << endl; return false; } try { Xapian::WritableDatabase *pIndex = pDatabase->writeLock(); if (pIndex != NULL) { Xapian::Document doc = pIndex->get_document(docId); Xapian::termcount termPos = 0; // Update the document data with the current language removeCommonTerms(doc); m_stemLanguage = Languages::toEnglish(docInfo.getLanguage()); addCommonTerms(docInfo, doc, termPos); setDocumentData(docInfo, doc, m_stemLanguage); pIndex->replace_document(docId, doc); updated = true; } } catch (const Xapian::Error &error) { cerr << "Couldn't update document properties: " << error.get_type() << ": " << error.get_msg() << endl; } catch (...) { cerr << "Couldn't update document properties, unknown exception occured" << endl; } pDatabase->unlock(); return updated; }
IndexingThread::IndexingThread(const DocumentInfo &docInfo, const string &labelName, unsigned int docId) : DownloadingThread(docInfo.getLocation(), false), m_docInfo(docInfo), m_labelName(labelName), m_docId(docId) { m_indexLocation = PinotSettings::getInstance().m_indexLocation; if (m_docId > 0) { // Ignore robots directives on updates m_ignoreRobotsDirectives = true; m_update = true; } else { m_ignoreRobotsDirectives = PinotSettings::getInstance().m_ignoreRobotsDirectives; // This is not an update m_update = false; } }
string XapianIndex::scanDocument(const char *pData, unsigned int dataLength, DocumentInfo &info) { vector<string> candidates; string language; // Try to determine the document's language LanguageDetector lang; lang.guessLanguage(pData, max(dataLength, (unsigned int)2048), candidates); // See which of these languages is suitable for stemming for (vector<string>::iterator langIter = candidates.begin(); langIter != candidates.end(); ++langIter) { if (*langIter == "unknown") { continue; } try { Xapian::Stem stemmer(*langIter); } catch (const Xapian::Error &error) { cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl; continue; } language = *langIter; break; } #ifdef DEBUG cout << "XapianIndex::scanDocument: language " << language << endl; #endif // Update the document's properties info.setLanguage(language); return language; }
void DaemonState::on_message_filefound(DocumentInfo docInfo, bool isDirectory) { if (isDirectory == false) { queue_index(docInfo); } else { PinotSettings::IndexableLocation newLocation; newLocation.m_monitor = true; newLocation.m_name = docInfo.getLocation().substr(7); newLocation.m_isSource = false; #ifdef DEBUG cout << "DaemonState::on_message_filefound: new directory " << newLocation.m_name << endl; #endif // Queue this directory for crawling m_crawlQueue.push(newLocation); start_crawling(); } }
void DaemonState::on_message_filefound(const DocumentInfo &docInfo, const string &sourceLabel, bool isDirectory) { if (isDirectory == false) { DocumentInfo docCopy(docInfo); set<string> labels; // Insert a label that identifies the source labels.insert(sourceLabel); docCopy.setLabels(labels); queue_index(docCopy); } else { string location(docInfo.getLocation()); crawlLocation(location.substr(7), false, true); #ifdef DEBUG cout << "DaemonState::on_message_filefound: new directory " << location.substr(7) << endl; #endif } }
void DBusServletThread::doWork(void) { XapianIndex index(PinotSettings::getInstance().m_daemonIndexLocation); DBusError error; const char *pSender = dbus_message_get_sender(m_pRequest); bool processedMessage = true, flushIndex = false; if ((m_pServer == NULL) || (m_pConnection == NULL) || (m_pRequest == NULL)) { return; } dbus_error_init(&error); #ifdef DEBUG if (pSender != NULL) { cout << "DBusServletThread::doWork: called by " << pSender << endl; } else { cout << "DBusServletThread::doWork: called by unknown sender" << endl; } #endif if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "DeleteLabel") == TRUE) { char *pLabel = NULL; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_STRING, &pLabel, DBUS_TYPE_INVALID) == TRUE) { #ifdef DEBUG cout << "DBusServletThread::doWork: received DeleteLabel " << pLabel << endl; #endif // Delete the label flushIndex = index.deleteLabel(pLabel); // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_STRING, &pLabel, DBUS_TYPE_INVALID); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "GetDocumentInfo") == TRUE) { unsigned int docId = 0; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID) == TRUE) { DocumentInfo docInfo; #ifdef DEBUG cout << "DBusServletThread::doWork: received GetDocumentInfo " << docId << endl; #endif if (index.getDocumentInfo(docId, docInfo) == true) { // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { string language(Languages::toEnglish(docInfo.getLanguage())); const char *pTitle = docInfo.getTitle().c_str(); const char *pLocation = docInfo.getLocation().c_str(); const char *pType = docInfo.getType().c_str(); const char *pLanguage = language.c_str(); dbus_message_append_args(m_pReply, DBUS_TYPE_STRING, &pTitle, DBUS_TYPE_STRING, &pLocation, DBUS_TYPE_STRING, &pType, DBUS_TYPE_STRING, &pLanguage, DBUS_TYPE_INVALID); } } else { m_pReply = dbus_message_new_error(m_pRequest, "de.berlios.Pinot.GetDocumentInfo", "Unknown document"); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "GetDocumentLabels") == TRUE) { unsigned int docId = 0; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID) == TRUE) { set<string> labels; #ifdef DEBUG cout << "DBusServletThread::doWork: received GetDocumentLabels " << docId << endl; #endif if (index.getDocumentLabels(docId, labels) == true) { m_pArray = g_ptr_array_new(); for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end(); ++labelIter) { string labelName(*labelIter); g_ptr_array_add(m_pArray, const_cast<char*>(labelName.c_str())); #ifdef DEBUG cout << "DBusServletThread::doWork: adding label " << m_pArray->len << " " << labelName << endl; #endif } // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING, &m_pArray->pdata, m_pArray->len, DBUS_TYPE_INVALID); } } else { m_pReply = dbus_message_new_error(m_pRequest, "de.berlios.Pinot.GetDocumentLabels", " failed"); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "GetStatistics") == TRUE) { CrawlHistory history(PinotSettings::getInstance().m_historyDatabase); unsigned int crawledFilesCount = history.getItemsCount(CrawlHistory::CRAWLED); unsigned int docsCount = index.getDocumentsCount(); #ifdef DEBUG cout << "DBusServletThread::doWork: received GetStatistics" << endl; #endif // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_UINT32, &crawledFilesCount, DBUS_TYPE_UINT32, &docsCount, DBUS_TYPE_INVALID); } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "RenameLabel") == TRUE) { char *pOldLabel = NULL; char *pNewLabel = NULL; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_STRING, &pOldLabel, DBUS_TYPE_STRING, &pNewLabel, DBUS_TYPE_INVALID) == TRUE) { #ifdef DEBUG cout << "DBusServletThread::doWork: received RenameLabel " << pOldLabel << ", " << pNewLabel << endl; #endif // Rename the label flushIndex = index.renameLabel(pOldLabel, pNewLabel); // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_STRING, &pNewLabel, DBUS_TYPE_INVALID); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "SetDocumentInfo") == TRUE) { char *pTitle = NULL; char *pLocation = NULL; char *pType = NULL; char *pLanguage = NULL; unsigned int docId = 0; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_STRING, &pTitle, DBUS_TYPE_STRING, &pLocation, DBUS_TYPE_STRING, &pType, DBUS_TYPE_STRING, &pLanguage, DBUS_TYPE_INVALID) == TRUE) { DocumentInfo docInfo(pTitle, pLocation, pType, ((pLanguage != NULL) ? Languages::toLocale(pLanguage) : "")); #ifdef DEBUG cout << "DBusServletThread::doWork: received SetDocumentInfo " << docId << ", " << pTitle << ", " << pLocation << ", " << pType << ", " << pLanguage << endl; #endif // Update the document info flushIndex = index.updateDocumentInfo(docId, docInfo); // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "SetDocumentLabels") == TRUE) { char **ppLabels = NULL; dbus_uint32_t labelsCount = 0; unsigned int docId = 0; gboolean resetLabels = TRUE; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING, &ppLabels, &labelsCount, DBUS_TYPE_BOOLEAN, &resetLabels, DBUS_TYPE_INVALID) == TRUE) { set<string> labels; for (dbus_uint32_t labelIndex = 0; labelIndex < labelsCount; ++labelIndex) { if (ppLabels[labelIndex] == NULL) { break; } labels.insert(ppLabels[labelIndex]); } #ifdef DEBUG cout << "DBusServletThread::doWork: received SetDocumentLabels on ID " << docId << ", " << labelsCount << " labels" << ", " << resetLabels << endl; #endif // Set labels flushIndex = index.setDocumentLabels(docId, labels, ((resetLabels == TRUE) ? true : false)); // Free container types g_strfreev(ppLabels); // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "SetDocumentsLabels") == TRUE) { char **ppDocIds = NULL; char **ppLabels = NULL; dbus_uint32_t idsCount = 0; dbus_uint32_t labelsCount = 0; gboolean resetLabels = TRUE; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING, &ppDocIds, &idsCount, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING, &ppLabels, &labelsCount, DBUS_TYPE_BOOLEAN, &resetLabels, DBUS_TYPE_INVALID) == TRUE) { set<unsigned int> docIds; set<string> labels; for (dbus_uint32_t idIndex = 0; idIndex < idsCount; ++idIndex) { if (ppDocIds[idIndex] == NULL) { break; } docIds.insert((unsigned int)atoi(ppDocIds[idIndex])); } for (dbus_uint32_t labelIndex = 0; labelIndex < labelsCount; ++labelIndex) { if (ppLabels[labelIndex] == NULL) { break; } labels.insert(ppLabels[labelIndex]); } #ifdef DEBUG cout << "DBusServletThread::doWork: received SetDocumentLabels on " << docIds.size() << " IDs, " << labelsCount << " labels" << ", " << resetLabels << endl; #endif // Set labels flushIndex = index.setDocumentsLabels(docIds, labels, ((resetLabels == TRUE) ? true : false)); // Free container types g_strfreev(ppDocIds); g_strfreev(ppLabels); // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_BOOLEAN, &flushIndex, DBUS_TYPE_INVALID); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "SimpleQuery") == TRUE) { char *pSearchText = NULL; dbus_uint32_t maxHits = 0; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_STRING, &pSearchText, DBUS_TYPE_UINT32, &maxHits, DBUS_TYPE_INVALID) == TRUE) { XapianEngine engine(PinotSettings::getInstance().m_daemonIndexLocation); bool replyWithError = true; #ifdef DEBUG cout << "DBusServletThread::doWork: received SimpleQuery " << pSearchText << ", " << maxHits << endl; #endif if (pSearchText != NULL) { QueryProperties queryProps("DBUS", pSearchText); // Run the query engine.setMaxResultsCount(maxHits); if (engine.runQuery(queryProps) == true) { const vector<Result> &resultsList = engine.getResults(); vector<string> docIds; m_pArray = g_ptr_array_new(); for (vector<Result>::const_iterator resultIter = resultsList.begin(); resultIter != resultsList.end(); ++resultIter) { // We only need the document ID unsigned int docId = index.hasDocument(resultIter->getLocation()); if (docId > 0) { char docIdStr[64]; snprintf(docIdStr, 64, "%u", docId); docIds.push_back(docIdStr); } } for (vector<string>::const_iterator docIter = docIds.begin(); docIter != docIds.end(); ++docIter) { #ifdef DEBUG cout << "DBusServletThread::doWork: adding result " << m_pArray->len << " " << *docIter << endl; #endif g_ptr_array_add(m_pArray, const_cast<char*>(docIter->c_str())); } // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING, &m_pArray->pdata, m_pArray->len, DBUS_TYPE_INVALID); replyWithError = false; } } } if (replyWithError == true) { m_pReply = dbus_message_new_error(m_pRequest, "de.berlios.Pinot.SimpleQuery", "Query failed"); } } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "Stop") == TRUE) { if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_INVALID) == TRUE) { int exitStatus = EXIT_SUCCESS; #ifdef DEBUG cout << "DBusServletThread::doWork: received Stop" << endl; #endif // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_INT32, &exitStatus, DBUS_TYPE_INVALID); } m_mustQuit = true; } } else if (dbus_message_is_method_call(m_pRequest, "de.berlios.Pinot", "UpdateDocument") == TRUE) { unsigned int docId = 0; if (dbus_message_get_args(m_pRequest, &error, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID) == TRUE) { DocumentInfo docInfo; #ifdef DEBUG cout << "DBusServletThread::doWork: received UpdateDocument " << docId << endl; #endif if (index.getDocumentInfo(docId, docInfo) == true) { // Update document m_pServer->queue_index(docInfo); } // Prepare the reply m_pReply = newDBusReply(m_pRequest); if (m_pReply != NULL) { dbus_message_append_args(m_pReply, DBUS_TYPE_UINT32, &docId, DBUS_TYPE_INVALID); } } } else { #ifdef DEBUG cout << "DBusServletThread::doWork: foreign message for/from " << dbus_message_get_interface(m_pRequest) << " " << dbus_message_get_member(m_pRequest) << endl; #endif processedMessage = false; } // Did an error occur ? if (error.message != NULL) { #ifdef DEBUG cout << "DBusServletThread::doWork: error occured: " << error.message << endl; #endif // Use the error message as reply m_pReply = dbus_message_new_error(m_pRequest, error.name, error.message); } dbus_error_free(&error); if (flushIndex == true) { // Flush now for the sake of the client application index.flush(); } // Send a reply ? if ((m_pConnection != NULL) && (m_pReply != NULL)) { dbus_connection_send(m_pConnection, m_pReply, NULL); dbus_connection_flush(m_pConnection); #ifdef DEBUG cout << "DBusServletThread::doWork: sent reply" << endl; #endif dbus_message_unref(m_pReply); } }
bool DirectoryScannerThread::scanEntry(const string &entryName, CrawlHistory &history) { CrawlHistory::CrawlStatus status = CrawlHistory::UNKNOWN; time_t itemDate; struct stat fileStat; int statSuccess = 0; bool scanSuccess = true; if (entryName.empty() == true) { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: no name" << endl; #endif return false; } // Skip . .. and dotfiles Url urlObj("file://" + entryName); if (urlObj.getFile()[0] == '.') { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: skipped dotfile " << urlObj.getFile() << endl; #endif return false; } if (m_followSymLinks == false) { statSuccess = lstat(entryName.c_str(), &fileStat); } else { // Stat the files pointed to by symlinks statSuccess = stat(entryName.c_str(), &fileStat); } // Is this item in the database already ? bool itemExists = history.hasItem("file://" + entryName, status, itemDate); if (statSuccess == -1) { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: stat failed with error " << errno << " " << strerror(errno) << endl; #endif scanSuccess = false; } // Is it a file or a directory ? else if (S_ISLNK(fileStat.st_mode)) { // This won't happen when m_followSymLinks is true #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: skipped symlink" << endl; #endif return false; } else if (S_ISREG(fileStat.st_mode)) { DocumentInfo docInfo; bool reportFile = false; docInfo.setLocation("file://" + entryName); // Is this file blacklisted ? // We have to check early so that if necessary the file's status stays at CRAWLING // and it is removed from the index at the end of this crawl if (PinotSettings::getInstance().isBlackListed(entryName) == false) { if (itemExists == false) { // Record it history.insertItem(docInfo.getLocation(), CrawlHistory::CRAWLED, m_sourceId, fileStat.st_mtime); #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: reporting new file " << entryName << endl; #endif reportFile = true; } else { // Update the record history.updateItem(docInfo.getLocation(), CrawlHistory::CRAWLED, fileStat.st_mtime); // Was it last crawled after it was modified ? if (itemDate < fileStat.st_mtime) { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: reporting modified file " << entryName << endl; #endif // No, crawl and index it again reportFile = true; } } } if (reportFile == true) { Url urlObj(docInfo.getLocation()); docInfo.setTitle(urlObj.getFile()); docInfo.setTimestamp(TimeConverter::toTimestamp(fileStat.st_mtime)); docInfo.setSize(fileStat.st_size); foundFile(docInfo); } } else if (S_ISDIR(fileStat.st_mode)) { // Can we scan this directory ? if (((m_maxLevel == 0) || (m_currentLevel < m_maxLevel)) && (PinotSettings::getInstance().isBlackListed(entryName) == false)) { ++m_currentLevel; // Open the directory DIR *pDir = opendir(entryName.c_str()); if (pDir != NULL) { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: entering " << entryName << endl; #endif if (m_pMonitor != NULL) { // Monitor first so that we don't miss events m_pMonitor->addLocation(entryName, true); } // Iterate through this directory's entries struct dirent *pDirEntry = readdir(pDir); while ((m_done == false) && (pDirEntry != NULL)) { char *pEntryName = pDirEntry->d_name; // Skip . .. and dotfiles if ((pEntryName != NULL) && (pEntryName[0] != '.')) { string subEntryName(entryName); if (entryName[entryName.length() - 1] != '/') { subEntryName += "/"; } subEntryName += pEntryName; // Scan this entry if (scanEntry(subEntryName, history) == false) { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: failed to open " << subEntryName << endl; #endif } } // Next entry pDirEntry = readdir(pDir); } #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: done with " << entryName << endl; #endif // Close the directory closedir(pDir); --m_currentLevel; } else { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: opendir failed with error " << errno << " " << strerror(errno) << endl; #endif scanSuccess = false; } } } else { #ifdef DEBUG cout << "DirectoryScannerThread::scanEntry: unknown entry type" << endl; #endif scanSuccess = false; } if (scanSuccess == false) { time_t timeNow = time(NULL); // Record this error if (itemExists == false) { history.insertItem("file://" + entryName, CrawlHistory::ERROR, m_sourceId, timeNow); } else { history.updateItem("file://" + entryName, CrawlHistory::ERROR, timeNow); } } return scanSuccess; }
void IndexBrowserThread::doWork(void) { set<unsigned int> docIDList; set<string> docLabels; unsigned int numDocs = 0; const map<string, string> &indexesMap = PinotSettings::getInstance().getIndexes(); map<string, string>::const_iterator mapIter = indexesMap.find(m_indexName); if (mapIter == indexesMap.end()) { m_status = _("Index"); m_status += " "; m_status += m_indexName; m_status += " "; m_status += _("doesn't exist"); return; } // Get the index at that location IndexInterface *pIndex = PinotSettings::getInstance().getIndex(mapIter->second); if ((pIndex == NULL) || (pIndex->isGood() == false)) { m_status = _("Index error on"); m_status += " "; m_status += mapIter->second; if (pIndex != NULL) { delete pIndex; } return; } m_indexDocsCount = pIndex->getDocumentsCount(m_labelName); if (m_indexDocsCount == 0) { #ifdef DEBUG cout << "IndexBrowserThread::doWork: no documents" << endl; #endif return; } #ifdef DEBUG cout << "IndexBrowserThread::doWork: " << m_maxDocsCount << " off " << m_indexDocsCount << " documents to browse, starting at " << m_startDoc << endl; #endif if (m_labelName.empty() == true) { pIndex->listDocuments(docIDList, m_maxDocsCount, m_startDoc); } else { pIndex->listDocumentsWithLabel(m_labelName, docIDList, m_maxDocsCount, m_startDoc); } m_documentsList.reserve(m_maxDocsCount); for (set<unsigned int>::iterator iter = docIDList.begin(); iter != docIDList.end(); ++iter) { if (m_done == true) { break; } // Get the document ID unsigned int docId = (*iter); // ...and the document URL string url = XapianDatabase::buildUrl(mapIter->second, docId); DocumentInfo docInfo; if (pIndex->getDocumentInfo(docId, docInfo) == true) { string type = docInfo.getType(); if (type.empty() == true) { type = "text/html"; } IndexedDocument indexedDoc(docInfo.getTitle(), url, docInfo.getLocation(), type, docInfo.getLanguage()); indexedDoc.setTimestamp(docInfo.getTimestamp()); indexedDoc.setSize(docInfo.getSize()); // Insert that document m_documentsList.push_back(indexedDoc); ++numDocs; } #ifdef DEBUG else cout << "IndexBrowserThread::doWork: couldn't retrieve document " << docId << endl; #endif } delete pIndex; }
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query, const string &stemLanguage, unsigned int startDoc, const QueryProperties &queryProps) { Timer timer; unsigned int maxResultsCount = queryProps.getMaximumResultsCount(); bool completedQuery = false; if (pIndex == NULL) { return false; } // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); timer.start(); try { AbstractGenerator abstractGen(pIndex, 50); vector<string> seedTerms; // Give the query object to the enquire session enquire.set_query(query); // How should results be sorted ? if (queryProps.getSortOrder() == QueryProperties::RELEVANCE) { // By relevance, only enquire.set_sort_by_relevance_then_value(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by relevance first" << endl; #endif } else if (queryProps.getSortOrder() == QueryProperties::DATE) { // By date, and then by relevance enquire.set_sort_by_value_then_relevance(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by date and time first" << endl; #endif } // Get the top results of the query Xapian::MSet matches = enquire.get_mset(startDoc, maxResultsCount, (2 * maxResultsCount) + 1); m_resultsCountEstimate = matches.get_matches_estimated(); if (matches.empty() == false) { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found " << matches.size() << "/" << maxResultsCount << " results found from position " << startDoc << endl; cout << "XapianEngine::queryDatabase: estimated " << matches.get_matches_lower_bound() << "/" << m_resultsCountEstimate << "/" << matches.get_matches_upper_bound() << endl; #endif // Get the results for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); // What terms did this document match ? seedTerms.clear(); for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId); termIter != enquire.get_matching_terms_end(docId); ++termIter) { char firstChar = (*termIter)[0]; if (isupper(((int)firstChar)) == 0) { seedTerms.push_back(*termIter); #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched term " << *termIter << endl; #endif } else if (firstChar == 'Z') { string stemmed((*termIter).substr(1)); string::size_type stemmedLen = stemmed.length(); // Which of this document's terms stem to this ? Xapian::TermIterator docTermIter = pIndex->termlist_begin(docId); if (docTermIter != pIndex->termlist_end(docId)) { for (docTermIter.skip_to(stemmed); docTermIter != pIndex->termlist_end(docId); ++docTermIter) { // Is this a potential unstem ? if (strncasecmp((*docTermIter).c_str(), stemmed.c_str(), stemmedLen) != 0) { // No, no point looking at the next terms break; } #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched unstem " << *docTermIter << endl; #endif // FIXME: check this term stems to stemmed ! seedTerms.push_back(*docTermIter); } } } } DocumentInfo thisResult; thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms)); thisResult.setScore((float)mIter.get_percent()); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found document ID " << docId << endl; #endif XapianDatabase::recordToProps(doc.get_data(), &thisResult); // XapianDatabase stored the language in English thisResult.setLanguage(Languages::toLocale(thisResult.getLanguage())); string url(thisResult.getLocation()); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId)); } // We don't know the index ID, just the document ID thisResult.setIsIndexed(0, docId); // Add this result m_resultsList.push_back(thisResult); } } completedQuery = true; } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } cout << "Ran query \"" << queryProps.getFreeQuery() << "\" in " << timer.stop() << " ms" << endl; try { m_expandTerms.clear(); // Expand the query ? if (m_expandDocuments.empty() == false) { Xapian::RSet expandDocs; for (set<string>::const_iterator docIter = m_expandDocuments.begin(); docIter != m_expandDocuments.end(); ++docIter) { string uniqueTerm(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(*docIter)), true)); // Only one document may have this term Xapian::PostingIterator postingIter = pIndex->postlist_begin(uniqueTerm); if (postingIter != pIndex->postlist_end(uniqueTerm)) { expandDocs.add_document(*postingIter); } } #ifdef DEBUG cout << "XapianEngine::queryDatabase: expand from " << expandDocs.size() << " documents" << endl; #endif // Get 10 non-prefixed terms string allowedPrefixes("RS"); TermDecider expandDecider(pIndex, ((stemLanguage.empty() == true) ? NULL : &m_stemmer), FileStopper::get_stopper(Languages::toCode(stemLanguage)), allowedPrefixes, query); Xapian::ESet expandTerms = enquire.get_eset(10, expandDocs, &expandDecider); #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << expandTerms.size() << " expand terms" << endl; #endif for (Xapian::ESetIterator termIter = expandTerms.begin(); termIter != expandTerms.end(); ++termIter) { string expandTerm(*termIter); char firstChar = expandTerm[0]; // Is this prefixed ? if (allowedPrefixes.find(firstChar) != string::npos) { expandTerm.erase(0, 1); } m_expandTerms.insert(expandTerm); } } } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } // Be tolerant of errors as long as we got some results if ((completedQuery == true) || (m_resultsList.empty() == false)) { return true; } return false; }