예제 #1
0
void QueryingThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
		return;
	}

	// Set the maximum number of results
	pEngine->setMaxResultsCount(m_queryProps.getMaximumResultsCount());

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	// Run the query
	if (pEngine->runQuery(m_queryProps) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
	}
	else
	{
		IndexInterface *pDocsIndex = NULL;
		IndexInterface *pDaemonIndex = NULL;
		const vector<Result> &resultsList = pEngine->getResults();
		unsigned int indexId = 0;
		bool isIndexQuery = false;

		m_resultsList.clear();
		m_resultsList.reserve(resultsList.size());
		m_resultsCharset = pEngine->getResultsCharset();

		// Are we querying an index ?
		if (m_engineName == "xapian")
		{
			// Internal index ?
			if (m_engineOption == settings.m_docsIndexLocation)
			{
				indexId = settings.getIndexId(_("My Web Pages"));
				isIndexQuery = true;
			}
			else if (m_engineOption == settings.m_daemonIndexLocation)
			{
				indexId = settings.getIndexId(_("My Documents"));
				isIndexQuery = true;
			}
		}

		// Will we have to query internal indices ?
		if (isIndexQuery == false)
		{
			pDocsIndex = settings.getIndex(settings.m_docsIndexLocation);
			pDaemonIndex = settings.getIndex(settings.m_daemonIndexLocation);
		}

		// Copy the results list
		for (vector<Result>::const_iterator resultIter = resultsList.begin();
			resultIter != resultsList.end(); ++resultIter)
		{
			Result current(*resultIter);
			string title(_("No title"));
			string location(current.getLocation());
			string language(current.getLanguage());
			unsigned int docId = 0;

			// The title may contain formatting
			if (current.getTitle().empty() == false)
			{
				title = FilterUtils::stripMarkup(current.getTitle());
			}
			current.setTitle(title);
#ifdef DEBUG
			cout << "QueryingThread::doWork: title is " << title << endl;
#endif

			// Use the query's language if the result's is unknown
			if (language.empty() == true)
			{
				language = m_queryProps.getLanguage();
			}
			current.setLanguage(language);

			if (isIndexQuery == true)
			{
				unsigned int tmpId = 0;

				// The index engine should have set this
				docId = current.getIsIndexed(tmpId);
			}

			// Is this in one of the indexes ?
			if ((pDocsIndex != NULL) &&
				(pDocsIndex->isGood() == true))
			{
				docId = pDocsIndex->hasDocument(location);
				if (docId > 0)
				{
					indexId = settings.getIndexId(_("My Web Pages"));
				}
			}
			if ((pDaemonIndex != NULL) &&
				(pDaemonIndex->isGood() == true) &&
				(docId == 0))
			{
				docId = pDaemonIndex->hasDocument(location);
				if (docId > 0)
				{
					indexId = settings.getIndexId(_("My Documents"));
				}
			}

			if (docId > 0)
			{
				current.setIsIndexed(indexId, docId);
#ifdef DEBUG
				cout << "QueryingThread::doWork: found in index " << indexId << endl;
#endif
			}
#ifdef DEBUG
			else cout << "QueryingThread::doWork: not found in any index" << endl;
#endif

			m_resultsList.push_back(current);
		}

		if (pDocsIndex != NULL)
		{
			delete pDocsIndex;
		}
		if (pDaemonIndex != NULL)
		{
			delete pDaemonIndex;
		}
	}

	delete pEngine;
}
예제 #2
0
void EngineQueryThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_errorNum = UNKNOWN_ENGINE;
		m_errorParam = m_engineDisplayableName;
		return;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	if (m_listingIndex == false)
	{
		pEngine->setLimitSet(m_limitToDocsSet);
	}

	// Run the query
	pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_AND);
	if (pEngine->runQuery(m_queryProps, m_startDoc) == false)
	{
		m_errorNum = QUERY_FAILED;
		m_errorParam = m_engineDisplayableName;
	}
	else
	{
		const vector<DocumentInfo> &resultsList = pEngine->getResults();

		m_documentsList.clear();
		m_documentsList.reserve(resultsList.size());
		m_documentsCount = pEngine->getResultsCountEstimate();
#ifdef DEBUG
		cout << "EngineQueryThread::doWork: " << resultsList.size() << " off " << m_documentsCount
			<< " results to process, starting at position " << m_startDoc << endl;
#endif

		m_resultsCharset = pEngine->getResultsCharset();
		if (m_listingIndex == false)
		{
			processResults(resultsList);
		}
		else
		{
			processResults(resultsList,
				PinotSettings::getInstance().getIndexIdByName(m_engineDisplayableName));
		}

		// Any spelling correction ?
		string correctedFreeQuery(pEngine->getSpellingCorrection());
		if (correctedFreeQuery.empty() == false)
		{
			m_correctedSpelling = true;
			m_queryProps.setFreeQuery(correctedFreeQuery);
		}
	}

	delete pEngine;
}
예제 #3
0
int main(int argc, char **argv)
{
	QueryProperties::QueryType queryType = QueryProperties::XAPIAN_QP;
	string engineType, option, csvExport, xmlExport, proxyAddress, proxyPort, proxyType;
	unsigned int maxResultsCount = 10; 
	int longOptionIndex = 0;
	bool printResults = true;

	// Look at the options
	int optionChar = getopt_long(argc, argv, "c:hm:a:p:qt:uvx:", g_longOptions, &longOptionIndex);
	while (optionChar != -1)
	{
		switch (optionChar)
		{
			case 'a':
				if (optarg != NULL)
				{
					proxyAddress = optarg;
				}
				break;
			case 'c':
				if (optarg != NULL)
				{
					csvExport = optarg;
					printResults = false;
				}
				break;
			case 'h':
				printHelp();
				return EXIT_SUCCESS;
			case 'm':
				if (optarg != NULL)
				{
					maxResultsCount = (unsigned int )atoi(optarg);
				}
				break;
			case 'p':
				if (optarg != NULL)
				{
					proxyPort = optarg;
				}
				break;
			case 'q':
				queryType = QueryProperties::XESAM_QL;
				break;
			case 't':
				if (optarg != NULL)
				{
					proxyType = optarg;
				}
				break;
			case 'u':
				queryType = QueryProperties::XESAM_UL;
				break;
			case 'v':
				cout << "pinot-search - " << PACKAGE_STRING << "\n\n"
					<< "This is free software.  You may redistribute copies of it under the terms of\n"
					<< "the GNU General Public License <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>.\n"
					<< "There is NO WARRANTY, to the extent permitted by law." << endl;
				return EXIT_SUCCESS;
			case 'x':
				if (optarg != NULL)
				{
					xmlExport = optarg;
					printResults = false;
				}
				break;
			default:
				return EXIT_FAILURE;
		}

		// Next option
		optionChar = getopt_long(argc, argv, "c:hm:a:p:qt:uvx:", g_longOptions, &longOptionIndex);
	}

	if (argc == 1)
	{
		printHelp();
		return EXIT_SUCCESS;
	}

	if ((argc < 4) ||
		(argc - optind != 3))
	{
		cerr << "Not enough parameters" << endl;
		return EXIT_FAILURE;
	}

	MIMEScanner::initialize();
	DownloaderInterface::initialize();

	engineType = argv[optind];
	option = argv[optind + 1];
	char *pQueryInput = argv[optind + 2];

	// Which SearchEngine ?
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(engineType, option);
	if (pEngine == NULL)
	{
		cerr << "Couldn't obtain search engine instance" << endl;

		DownloaderInterface::shutdown();
		MIMEScanner::shutdown();

		return EXIT_FAILURE;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(proxyAddress.empty() == false) &&
		(proxyPort.empty() == false))
	{
		pDownloader->setSetting("proxyaddress", proxyAddress);
		pDownloader->setSetting("proxyport", proxyPort);
		pDownloader->setSetting("proxytype", proxyType);
	}

	// Set the query
	QueryProperties queryProps("pinot-search", "", queryType);
	if (queryType == QueryProperties::XAPIAN_QP)
	{
		queryProps.setFreeQuery(pQueryInput);
	}
	else
	{
		string fileContents;

		// Load the query from file
		if (loadFile(pQueryInput, fileContents) == false)
		{
			cerr << "Couldn't load query from file " << pQueryInput << endl;

			DownloaderInterface::shutdown();
			MIMEScanner::shutdown();

			return EXIT_FAILURE;
		}

		queryProps.setFreeQuery(fileContents);
	}

	queryProps.setMaximumResultsCount(maxResultsCount);
	if (pEngine->runQuery(queryProps) == true)
	{
		string resultsPage;

		// Try getting a list of links
		const vector<DocumentInfo> resultsList = pEngine->getResults();
		if (resultsList.empty() == false)
		{
			if (printResults == true)
			{
				unsigned int count = 0;

				cout << "Matching documents are :" << endl;

				vector<DocumentInfo>::const_iterator resultIter = resultsList.begin();
				while (resultIter != resultsList.end())
				{
					string rawUrl(resultIter->getLocation());
					Url thisUrl(rawUrl);

					cout << count << " Raw URL  : '" << rawUrl << "'"<< endl;
					cout << count << " Protocol : " << thisUrl.getProtocol() << endl;
					cout << count << " Host     : " << thisUrl.getHost() << endl;
					cout << count << " Location : " << thisUrl.getLocation() << "/" << thisUrl.getFile() << endl;
					cout << count << " Title    : " << resultIter->getTitle() << endl;
					cout << count << " Type     : " << resultIter->getType() << endl;
					cout << count << " Language : " << resultIter->getLanguage() << endl;
					cout << count << " Extract  : " << resultIter->getExtract() << endl;
					cout << count << " Score    : " << resultIter->getScore() << endl;
					count++;

					// Next
					resultIter++;
				}
			}
			else
			{
				string engineName(SearchEngineFactory::getSearchEngineName(engineType, option));

				if (csvExport.empty() == false)
				{
					CSVExporter exporter(csvExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}

				if (xmlExport.empty() == false)
				{
					OpenSearchExporter exporter(xmlExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}
			}
		}
		else
		{
			cerr << "Couldn't get a results list !" << endl;
		}
	}
	else
	{
		cerr << "Couldn't run query on search engine " << engineType << endl;
	}

	delete pEngine;

	XapianDatabaseFactory::closeAll();
	DownloaderInterface::shutdown();
	MIMEScanner::shutdown();

	return EXIT_SUCCESS;
}
예제 #4
0
void QueryingThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
		return;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	// Run the query
	if (pEngine->runQuery(m_queryProps, m_startDoc) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
	}
	else
	{
		const vector<DocumentInfo> &resultsList = pEngine->getResults();

		m_documentsList.clear();
		m_documentsList.reserve(resultsList.size());
		m_documentsCount = pEngine->getResultsCountEstimate();
#ifdef DEBUG
		cout << "QueryingThread::doWork: " << resultsList.size() << " off " << m_documentsCount
			<< " results to process, starting at position " << m_startDoc << endl;
#endif

		m_resultsCharset = pEngine->getResultsCharset();
		if (m_listingIndex == false)
		{
			processResults(resultsList);
		}
		else
		{
			processResults(resultsList,
				PinotSettings::getInstance().getIndexId(m_engineDisplayableName));
		}
	}

	delete pEngine;
}