/// Returns a SearchEngine of the specified type; NULL if unavailable.
SearchEngineInterface *SearchEngineFactory::getSearchEngine(const string &type, const string &option)
{
    SearchEngineInterface *myEngine = NULL;

    // Choice by type
    if ((type == "sherlock") ||
            (type == "opensearch"))
    {
        myEngine = new PluginWebEngine(option);
    }
    else if (type == "xapian")
    {
        myEngine = new XapianEngine(option);
    }
#ifdef HAS_GOOGLEAPI
    else if (type == "googleapi")
    {
        myEngine = new GoogleAPIEngine();
        myEngine->setKey(option);
    }
#endif
#ifdef HAS_OSAPI
    else if (type == "objectssearchapi")
    {
        myEngine = new ObjectsSearchAPIEngine();
    }
#endif

    return myEngine;
}
Ejemplo n.º 2
0
void ExpandQueryThread::doWork(void)
{
	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_queryProps.getName();
		return;
	}

	// Set the maximum number of results
	pEngine->setMaxResultsCount(m_queryProps.getMaximumResultsCount());
	// Set whether to expand the query
	pEngine->setQueryExpansion(m_relevantDocs);

	// Run the query
	if (pEngine->runQuery(m_queryProps) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineName;
	}
	else
	{
		// Copy the expand terms
		const set<string> &expandTerms = pEngine->getExpandTerms();
		copy(expandTerms.begin(), expandTerms.end(),
			inserter(m_expandTerms, m_expandTerms.begin()));
	}

	delete pEngine;
}
Ejemplo n.º 3
0
void ExpandQueryThread::doWork(void)
{
	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine("xapian", "MERGED");
	if (pEngine == NULL)
	{
		m_errorNum = UNKNOWN_ENGINE;
		m_errorParam = m_queryProps.getName();
		return;
	}

	// Expand the query
	pEngine->setExpandSet(m_expandFromDocsSet);

	// Run the query
	pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_AND);
	if (pEngine->runQuery(m_queryProps) == false)
	{
		m_errorNum = QUERY_FAILED;
	}
	else
	{
		// Copy the expand terms
		const set<string> &expandTerms = pEngine->getExpandTerms();
		copy(expandTerms.begin(), expandTerms.end(),
			inserter(m_expandTerms, m_expandTerms.begin()));
	}

	delete pEngine;
}
Ejemplo n.º 4
0
void QueryingThread::doWork(void)
{
	// Get the SearchEngine
	SearchEngineInterface *engine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (engine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
		return;
	}
	// Set the maximum number of results
	engine->setMaxResultsCount(m_queryProps.getMaximumResultsCount());

	// Run the query
	if (engine->runQuery(m_queryProps) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
	}
	else
	{
		const vector<Result> &resultsList = engine->getResults();

		m_resultsList.clear();
		m_resultsList.reserve(resultsList.size());
		m_resultsCharset = engine->getResultsCharset();

		// Copy the results list
		for (vector<Result>::const_iterator resultIter = resultsList.begin();
			resultIter != resultsList.end(); ++resultIter)
		{
			string title = _("No title");
			string extract = HtmlTokenizer::stripTags(resultIter->getExtract());

			// The title may contain formatting
			if (resultIter->getTitle().empty() == false)
			{
				title = HtmlTokenizer::stripTags(resultIter->getTitle());
			}

			string language = resultIter->getLanguage();
			if (language.empty() == true)
			{
				// Use the query's language
				language = m_queryProps.getLanguage();
			}

			m_resultsList.push_back(Result(resultIter->getLocation(),
				title,
				extract,
				language,
				resultIter->getScore()));
		}
	}
	delete engine;
}
Ejemplo n.º 5
0
void ExpandQueryThread::doWork(void)
{
	IndexInterface *pIndex = PinotSettings::getInstance().getIndex("MERGED");
	set<unsigned int> relevantDocIds;

	if ((pIndex == NULL) ||
		(pIndex->isGood() == false))
	{
		m_status = _("Index error on");
		m_status += " MERGED";
		if (pIndex != NULL)
		{
			delete pIndex;
		}
		return;
	}

	for (set<string>::iterator locationIter = m_relevantDocs.begin();
		locationIter != m_relevantDocs.end(); ++locationIter)
	{
		relevantDocIds.insert(pIndex->hasDocument(*locationIter));
	}

	delete pIndex;

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine("xapian", "MERGED");
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_queryProps.getName();
		return;
	}

	// Set the maximum number of results
	pEngine->setMaxResultsCount(m_queryProps.getMaximumResultsCount());
	// Set whether to expand the query
	pEngine->setQueryExpansion(relevantDocIds);

	// Run the query
	if (pEngine->runQuery(m_queryProps) == false)
	{
		m_status = _("Couldn't run query on search engine");
	}
	else
	{
		// Copy the expand terms
		const set<string> &expandTerms = pEngine->getExpandTerms();
		copy(expandTerms.begin(), expandTerms.end(),
			inserter(m_expandTerms, m_expandTerms.begin()));
	}

	delete pEngine;
}
Ejemplo n.º 6
0
bool DBusServletThread::runQuery(QueryProperties &queryProps, vector<string> &docIds)
{
	PinotSettings &settings = PinotSettings::getInstance();

	docIds.clear();

	SearchEngineInterface *pEngine = ModuleFactory::getSearchEngine(settings.m_defaultBackend,
		settings.m_daemonIndexLocation);
	if (pEngine == NULL)
	{
		return false;
	}

	// Run the query
	pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_AND);
	if (pEngine->runQuery(queryProps) == false)
	{
		delete pEngine;

		return false;
	}

	const vector<DocumentInfo> &resultsList = pEngine->getResults();
	if (resultsList.empty() == true)
	{
#ifdef DEBUG
		cout << "DBusServletThread::runQuery: trying again" << endl;
#endif
		// Try again, this time with OR as default operator
		pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_OR);
		if (pEngine->runQuery(queryProps) == false)
		{
			delete pEngine;

			return false;
		}
	}

	for (vector<DocumentInfo>::const_iterator resultIter = resultsList.begin();
		resultIter != resultsList.end(); ++resultIter)
	{
		unsigned int indexId = 0;
		unsigned int docId = resultIter->getIsIndexed(indexId);

		// We only need the document ID
		if (docId > 0)
		{
			char docIdStr[64];

			snprintf(docIdStr, 64, "%u", docId);
			docIds.push_back(docIdStr);
		}
	}

	delete pEngine;

	return true;
}
Ejemplo n.º 7
0
void QueryingThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
		return;
	}

	// Set the maximum number of results
	pEngine->setMaxResultsCount(m_queryProps.getMaximumResultsCount());

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	// Run the query
	if (pEngine->runQuery(m_queryProps) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
	}
	else
	{
		IndexInterface *pDocsIndex = NULL;
		IndexInterface *pDaemonIndex = NULL;
		const vector<Result> &resultsList = pEngine->getResults();
		unsigned int indexId = 0;
		bool isIndexQuery = false;

		m_resultsList.clear();
		m_resultsList.reserve(resultsList.size());
		m_resultsCharset = pEngine->getResultsCharset();

		// Are we querying an index ?
		if (m_engineName == "xapian")
		{
			// Internal index ?
			if (m_engineOption == settings.m_docsIndexLocation)
			{
				indexId = settings.getIndexId(_("My Web Pages"));
				isIndexQuery = true;
			}
			else if (m_engineOption == settings.m_daemonIndexLocation)
			{
				indexId = settings.getIndexId(_("My Documents"));
				isIndexQuery = true;
			}
		}

		// Will we have to query internal indices ?
		if (isIndexQuery == false)
		{
			pDocsIndex = settings.getIndex(settings.m_docsIndexLocation);
			pDaemonIndex = settings.getIndex(settings.m_daemonIndexLocation);
		}

		// Copy the results list
		for (vector<Result>::const_iterator resultIter = resultsList.begin();
			resultIter != resultsList.end(); ++resultIter)
		{
			Result current(*resultIter);
			string title(_("No title"));
			string location(current.getLocation());
			string language(current.getLanguage());
			unsigned int docId = 0;

			// The title may contain formatting
			if (current.getTitle().empty() == false)
			{
				title = FilterUtils::stripMarkup(current.getTitle());
			}
			current.setTitle(title);
#ifdef DEBUG
			cout << "QueryingThread::doWork: title is " << title << endl;
#endif

			// Use the query's language if the result's is unknown
			if (language.empty() == true)
			{
				language = m_queryProps.getLanguage();
			}
			current.setLanguage(language);

			if (isIndexQuery == true)
			{
				unsigned int tmpId = 0;

				// The index engine should have set this
				docId = current.getIsIndexed(tmpId);
			}

			// Is this in one of the indexes ?
			if ((pDocsIndex != NULL) &&
				(pDocsIndex->isGood() == true))
			{
				docId = pDocsIndex->hasDocument(location);
				if (docId > 0)
				{
					indexId = settings.getIndexId(_("My Web Pages"));
				}
			}
			if ((pDaemonIndex != NULL) &&
				(pDaemonIndex->isGood() == true) &&
				(docId == 0))
			{
				docId = pDaemonIndex->hasDocument(location);
				if (docId > 0)
				{
					indexId = settings.getIndexId(_("My Documents"));
				}
			}

			if (docId > 0)
			{
				current.setIsIndexed(indexId, docId);
#ifdef DEBUG
				cout << "QueryingThread::doWork: found in index " << indexId << endl;
#endif
			}
#ifdef DEBUG
			else cout << "QueryingThread::doWork: not found in any index" << endl;
#endif

			m_resultsList.push_back(current);
		}

		if (pDocsIndex != NULL)
		{
			delete pDocsIndex;
		}
		if (pDaemonIndex != NULL)
		{
			delete pDaemonIndex;
		}
	}

	delete pEngine;
}
Ejemplo n.º 8
0
int main(int argc, char **argv)
{
	QueryProperties::QueryType queryType = QueryProperties::XAPIAN_QP;
	string engineType, option, csvExport, xmlExport, proxyAddress, proxyPort, proxyType;
	unsigned int maxResultsCount = 10; 
	int longOptionIndex = 0;
	bool printResults = true;

	// Look at the options
	int optionChar = getopt_long(argc, argv, "c:hm:a:p:qt:uvx:", g_longOptions, &longOptionIndex);
	while (optionChar != -1)
	{
		switch (optionChar)
		{
			case 'a':
				if (optarg != NULL)
				{
					proxyAddress = optarg;
				}
				break;
			case 'c':
				if (optarg != NULL)
				{
					csvExport = optarg;
					printResults = false;
				}
				break;
			case 'h':
				printHelp();
				return EXIT_SUCCESS;
			case 'm':
				if (optarg != NULL)
				{
					maxResultsCount = (unsigned int )atoi(optarg);
				}
				break;
			case 'p':
				if (optarg != NULL)
				{
					proxyPort = optarg;
				}
				break;
			case 'q':
				queryType = QueryProperties::XESAM_QL;
				break;
			case 't':
				if (optarg != NULL)
				{
					proxyType = optarg;
				}
				break;
			case 'u':
				queryType = QueryProperties::XESAM_UL;
				break;
			case 'v':
				cout << "pinot-search - " << PACKAGE_STRING << "\n\n"
					<< "This is free software.  You may redistribute copies of it under the terms of\n"
					<< "the GNU General Public License <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>.\n"
					<< "There is NO WARRANTY, to the extent permitted by law." << endl;
				return EXIT_SUCCESS;
			case 'x':
				if (optarg != NULL)
				{
					xmlExport = optarg;
					printResults = false;
				}
				break;
			default:
				return EXIT_FAILURE;
		}

		// Next option
		optionChar = getopt_long(argc, argv, "c:hm:a:p:qt:uvx:", g_longOptions, &longOptionIndex);
	}

	if (argc == 1)
	{
		printHelp();
		return EXIT_SUCCESS;
	}

	if ((argc < 4) ||
		(argc - optind != 3))
	{
		cerr << "Not enough parameters" << endl;
		return EXIT_FAILURE;
	}

	MIMEScanner::initialize();
	DownloaderInterface::initialize();

	engineType = argv[optind];
	option = argv[optind + 1];
	char *pQueryInput = argv[optind + 2];

	// Which SearchEngine ?
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(engineType, option);
	if (pEngine == NULL)
	{
		cerr << "Couldn't obtain search engine instance" << endl;

		DownloaderInterface::shutdown();
		MIMEScanner::shutdown();

		return EXIT_FAILURE;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(proxyAddress.empty() == false) &&
		(proxyPort.empty() == false))
	{
		pDownloader->setSetting("proxyaddress", proxyAddress);
		pDownloader->setSetting("proxyport", proxyPort);
		pDownloader->setSetting("proxytype", proxyType);
	}

	// Set the query
	QueryProperties queryProps("pinot-search", "", queryType);
	if (queryType == QueryProperties::XAPIAN_QP)
	{
		queryProps.setFreeQuery(pQueryInput);
	}
	else
	{
		string fileContents;

		// Load the query from file
		if (loadFile(pQueryInput, fileContents) == false)
		{
			cerr << "Couldn't load query from file " << pQueryInput << endl;

			DownloaderInterface::shutdown();
			MIMEScanner::shutdown();

			return EXIT_FAILURE;
		}

		queryProps.setFreeQuery(fileContents);
	}

	queryProps.setMaximumResultsCount(maxResultsCount);
	if (pEngine->runQuery(queryProps) == true)
	{
		string resultsPage;

		// Try getting a list of links
		const vector<DocumentInfo> resultsList = pEngine->getResults();
		if (resultsList.empty() == false)
		{
			if (printResults == true)
			{
				unsigned int count = 0;

				cout << "Matching documents are :" << endl;

				vector<DocumentInfo>::const_iterator resultIter = resultsList.begin();
				while (resultIter != resultsList.end())
				{
					string rawUrl(resultIter->getLocation());
					Url thisUrl(rawUrl);

					cout << count << " Raw URL  : '" << rawUrl << "'"<< endl;
					cout << count << " Protocol : " << thisUrl.getProtocol() << endl;
					cout << count << " Host     : " << thisUrl.getHost() << endl;
					cout << count << " Location : " << thisUrl.getLocation() << "/" << thisUrl.getFile() << endl;
					cout << count << " Title    : " << resultIter->getTitle() << endl;
					cout << count << " Type     : " << resultIter->getType() << endl;
					cout << count << " Language : " << resultIter->getLanguage() << endl;
					cout << count << " Extract  : " << resultIter->getExtract() << endl;
					cout << count << " Score    : " << resultIter->getScore() << endl;
					count++;

					// Next
					resultIter++;
				}
			}
			else
			{
				string engineName(SearchEngineFactory::getSearchEngineName(engineType, option));

				if (csvExport.empty() == false)
				{
					CSVExporter exporter(csvExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}

				if (xmlExport.empty() == false)
				{
					OpenSearchExporter exporter(xmlExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}
			}
		}
		else
		{
			cerr << "Couldn't get a results list !" << endl;
		}
	}
	else
	{
		cerr << "Couldn't run query on search engine " << engineType << endl;
	}

	delete pEngine;

	XapianDatabaseFactory::closeAll();
	DownloaderInterface::shutdown();
	MIMEScanner::shutdown();

	return EXIT_SUCCESS;
}
Ejemplo n.º 9
0
int main(int argc, char **argv)
{
	string type, option;
	bool bDownloadResults = false;

	if (argc < 5)
	{
		cerr << "Usage: " << argv[0] << " <search engine name> <option> <search string> <max results> [DOWNLOAD]" << endl;
		return EXIT_FAILURE;
	}
	if (argc > 5)
	{
		string flag = argv[5];
		if (flag == "DOWNLOAD")
		{
			bDownloadResults = true;
		}
	}

	// Which SearchEngine ?
	type = argv[1];
	option = argv[2];
	SearchEngineInterface *myEngine = SearchEngineFactory::getSearchEngine(type, option);
	if (myEngine == NULL)
	{
		cerr << "Couldn't obtain search engine instance" << endl;
		return EXIT_FAILURE;
	}

	// How many results ?
	unsigned int count = atoi(argv[4]);
	myEngine->setMaxResultsCount(count);

	QueryProperties queryProps("senginetest", argv[3], "", "", "");
	bool bOK = myEngine->runQuery(queryProps);
	if (bOK == true)
	{
		string resultsPage;

		// Try getting a list of links
		const vector<Result> resultsList = myEngine->getResults();
		if (resultsList.empty() == false)
		{
			unsigned int count = 0;

			cout << "Matching documents are :" << endl;

			vector<Result>::const_iterator resultIter = resultsList.begin();
			while (resultIter != resultsList.end())
			{
				string rawUrl = (*resultIter).getLocation();
				Url thisUrl(rawUrl);

				cout << count << " Raw URL  : '" << rawUrl << "'"<< endl;
				cout << count << " Protocol : " << thisUrl.getProtocol() << endl;
				cout << count << " Host     : " << thisUrl.getHost() << endl;
				cout << count << " Location : " << thisUrl.getLocation() << "/" << thisUrl.getFile() << endl;
				cout << count << " Title    : " << HtmlTokenizer::stripTags((*resultIter).getTitle()) << endl;
				cout << count << " Extract  : " << HtmlTokenizer::stripTags((*resultIter).getExtract()) << endl;
				cout << count << " Score    : " << (*resultIter).getScore() << endl;

				if (bDownloadResults == true)
				{
					// Set the name of the file to which this page will be saved
					char num[16];
					sprintf(num, "%d", count);
					string url = (*resultIter).getLocation();
					string file = num;
					file += "_";
					file += thisUrl.getHost();
					file += ".html";

					if (type == "googleapi")
					{
						// Fetch the page from the Google cache
						fetchCachedPage(url, file, option);
					}
					else
					{
						fetchPage(url, file);
					}
				}
				count++;

				// Next
				resultIter++;
			}
		}
		else
		{
			cerr << "Couldn't get a results list !" << endl;
		}
	}
	else
	{
		cerr << "Couldn't run query on search engine " << argv[1] << endl;
	}

	delete myEngine;

	return EXIT_SUCCESS;
}
Ejemplo n.º 10
0
void EngineQueryThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_errorNum = UNKNOWN_ENGINE;
		m_errorParam = m_engineDisplayableName;
		return;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	if (m_listingIndex == false)
	{
		pEngine->setLimitSet(m_limitToDocsSet);
	}

	// Run the query
	pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_AND);
	if (pEngine->runQuery(m_queryProps, m_startDoc) == false)
	{
		m_errorNum = QUERY_FAILED;
		m_errorParam = m_engineDisplayableName;
	}
	else
	{
		const vector<DocumentInfo> &resultsList = pEngine->getResults();

		m_documentsList.clear();
		m_documentsList.reserve(resultsList.size());
		m_documentsCount = pEngine->getResultsCountEstimate();
#ifdef DEBUG
		cout << "EngineQueryThread::doWork: " << resultsList.size() << " off " << m_documentsCount
			<< " results to process, starting at position " << m_startDoc << endl;
#endif

		m_resultsCharset = pEngine->getResultsCharset();
		if (m_listingIndex == false)
		{
			processResults(resultsList);
		}
		else
		{
			processResults(resultsList,
				PinotSettings::getInstance().getIndexIdByName(m_engineDisplayableName));
		}

		// Any spelling correction ?
		string correctedFreeQuery(pEngine->getSpellingCorrection());
		if (correctedFreeQuery.empty() == false)
		{
			m_correctedSpelling = true;
			m_queryProps.setFreeQuery(correctedFreeQuery);
		}
	}

	delete pEngine;
}
Ejemplo n.º 11
0
int main(int argc, char **argv)
{
	QueryProperties::QueryType queryType = QueryProperties::XAPIAN_QP;
	string engineType, option, csvExport, xmlExport, stemLanguage;
	unsigned int maxResultsCount = 10; 
	int longOptionIndex = 0;
	bool printResults = true;
	bool sortByDate = false;
	bool locationOnly = false;
	bool isStoredQuery = false;

	// Look at the options
	int optionChar = getopt_long(argc, argv, "c:dhlm:rs:vx:", g_longOptions, &longOptionIndex);
	while (optionChar != -1)
	{
		switch (optionChar)
		{
			case 'c':
				if (optarg != NULL)
				{
					csvExport = optarg;
					printResults = false;
				}
				break;
			case 'd':
				sortByDate = true;
				break;
			case 'h':
				printHelp();
				return EXIT_SUCCESS;
			case 'l':
				locationOnly = true;
				break;
			case 'm':
				if (optarg != NULL)
				{
					maxResultsCount = (unsigned int )atoi(optarg);
				}
				break;
			case 'r':
				isStoredQuery = true;
				break;
			case 's':
				if (optarg != NULL)
				{
					stemLanguage = optarg;
				}
				break;
			case 'v':
				clog << "pinot-search - " << PACKAGE_STRING << "\n\n"
					<< "This is free software.  You may redistribute copies of it under the terms of\n"
					<< "the GNU General Public License <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>.\n"
					<< "There is NO WARRANTY, to the extent permitted by law." << endl;
				return EXIT_SUCCESS;
			case 'x':
				if (optarg != NULL)
				{
					xmlExport = optarg;
					printResults = false;
				}
				break;
			default:
				return EXIT_FAILURE;
		}

		// Next option
		optionChar = getopt_long(argc, argv, "c:dhlm:rs:vx:", g_longOptions, &longOptionIndex);
	}

#if defined(ENABLE_NLS)
	bindtextdomain(GETTEXT_PACKAGE, PACKAGE_LOCALE_DIR);
	bind_textdomain_codeset(GETTEXT_PACKAGE, "UTF-8");
	textdomain(GETTEXT_PACKAGE);
#endif //ENABLE_NLS

	if (argc == 1)
	{
		printHelp();
		return EXIT_SUCCESS;
	}

	if ((argc < 4) ||
		(argc - optind != 3))
	{
		clog << "Wrong number of parameters" << endl;
		return EXIT_FAILURE;
	}

	// This will create the necessary directories on the first run
	PinotSettings &settings = PinotSettings::getInstance();
	string confDirectory(PinotSettings::getConfigurationDirectory());

	if (MIMEScanner::initialize(PinotSettings::getHomeDirectory() + "/.local",
		string(SHARED_MIME_INFO_PREFIX)) == false)
	{
		clog << "Couldn't load MIME settings" << endl;
	}
	DownloaderInterface::initialize();
	ModuleFactory::loadModules(string(LIBDIR) + string("/pinot/backends"));
	ModuleFactory::loadModules(confDirectory + "/backends");

	// Localize language names
	Languages::setIntlName(0, _("Unknown"));
	Languages::setIntlName(1, _("Danish"));
	Languages::setIntlName(2, _("Dutch"));
	Languages::setIntlName(3, _("English"));
	Languages::setIntlName(4, _("Finnish"));
	Languages::setIntlName(5, _("French"));
	Languages::setIntlName(6, _("German"));
	Languages::setIntlName(7, _("Hungarian"));
	Languages::setIntlName(8, _("Italian"));
	Languages::setIntlName(9, _("Norwegian"));
	Languages::setIntlName(10, _("Portuguese"));
	Languages::setIntlName(11, _("Romanian"));
	Languages::setIntlName(12, _("Russian"));
	Languages::setIntlName(13, _("Spanish"));
	Languages::setIntlName(14, _("Swedish"));
	Languages::setIntlName(15, _("Turkish"));

	// Load the settings
	settings.load(PinotSettings::LOAD_ALL);

	engineType = argv[optind];
	option = argv[optind + 1];
	char *pQueryInput = argv[optind + 2];

	// Set the query
	QueryProperties queryProps("pinot-search", "", queryType);
	if (queryType == QueryProperties::XAPIAN_QP)
	{
		if (isStoredQuery == true)
		{
			const map<string, QueryProperties> &queries = settings.getQueries();
			map<string, QueryProperties>::const_iterator queryIter = queries.find(pQueryInput);
			if (queryIter != queries.end())
			{
				queryProps = queryIter->second;
			}
			else
			{
				clog << "Couldn't find stored query " << pQueryInput << endl;

				DownloaderInterface::shutdown();
				MIMEScanner::shutdown();

				return EXIT_FAILURE;
			}
		}
		else
		{
			queryProps.setFreeQuery(pQueryInput);
		}
	}
	queryProps.setStemmingLanguage(stemLanguage);
	queryProps.setMaximumResultsCount(maxResultsCount);
	if (sortByDate == true)
	{
		queryProps.setSortOrder(QueryProperties::DATE);
	}

	// Which SearchEngine ?
	SearchEngineInterface *pEngine = ModuleFactory::getSearchEngine(engineType, option);
	if (pEngine == NULL)
	{
		clog << "Couldn't obtain search engine instance" << endl;

		DownloaderInterface::shutdown();
		MIMEScanner::shutdown();

		return EXIT_FAILURE;
	}

	// Set up the proxy
	WebEngine *pWebEngine = dynamic_cast<WebEngine *>(pEngine);
	if (pWebEngine != NULL)
	{
		DownloaderInterface *pDownloader = pWebEngine->getDownloader();
		if ((pDownloader != NULL) &&
			(settings.m_proxyEnabled == true) &&
			(settings.m_proxyAddress.empty() == false))
		{
			char portStr[64];

			pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
			snprintf(portStr, 64, "%u", settings.m_proxyPort);
			pDownloader->setSetting("proxyport", portStr);
			pDownloader->setSetting("proxytype", settings.m_proxyType);
		}

		pWebEngine->setEditableValues(settings.m_editablePluginValues);
	}

	pEngine->setDefaultOperator(SearchEngineInterface::DEFAULT_OP_AND);
	if (pEngine->runQuery(queryProps) == true)
	{
		string resultsPage;
		unsigned int estimatedResultsCount = pEngine->getResultsCountEstimate();

		const vector<DocumentInfo> &resultsList = pEngine->getResults();
		if (resultsList.empty() == false)
		{
			if (printResults == true)
			{
				unsigned int count = 0;

				if (locationOnly == false)
				{
					clog << "Showing " << resultsList.size() << " results of about " << estimatedResultsCount << endl;
				}

				vector<DocumentInfo>::const_iterator resultIter = resultsList.begin();
				while (resultIter != resultsList.end())
				{
					string rawUrl(resultIter->getLocation(true));

					if (locationOnly == false)
					{
						clog << count << " Location : '" << rawUrl << "'"<< endl;
						clog << count << " Title    : " << resultIter->getTitle() << endl;
						clog << count << " Type     : " << resultIter->getType() << endl;
						clog << count << " Language : " << resultIter->getLanguage() << endl;
						clog << count << " Date     : " << resultIter->getTimestamp() << endl;
						clog << count << " Size     : " << resultIter->getSize() << endl;
						clog << count << " Extract  : " << resultIter->getExtract() << endl;
						clog << count << " Score    : " << resultIter->getScore() << endl;
					}
					else
					{
						clog << rawUrl << endl;
					}
					++count;

					// Next
					++resultIter;
				}
			}
			else
			{
				string engineName(ModuleFactory::getSearchEngineName(engineType, option));

				if (csvExport.empty() == false)
				{
					CSVExporter exporter(csvExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}

				if (xmlExport.empty() == false)
				{
					OpenSearchExporter exporter(xmlExport, queryProps);

					exporter.exportResults(engineName, maxResultsCount, resultsList);
				}
			}
		}
		else
		{
			clog << "No results" << endl;
		}
	}
	else
	{
		clog << "Couldn't run query on search engine " << engineType << endl;
	}

	delete pEngine;

	ModuleFactory::unloadModules();
	DownloaderInterface::shutdown();
	MIMEScanner::shutdown();

	return EXIT_SUCCESS;
}
Ejemplo n.º 12
0
void QueryingThread::doWork(void)
{
	PinotSettings &settings = PinotSettings::getInstance();

	// Get the SearchEngine
	SearchEngineInterface *pEngine = SearchEngineFactory::getSearchEngine(m_engineName, m_engineOption);
	if (pEngine == NULL)
	{
		m_status = _("Couldn't create search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
		return;
	}

	// Set up the proxy
	DownloaderInterface *pDownloader = pEngine->getDownloader();
	if ((pDownloader != NULL) &&
		(settings.m_proxyEnabled == true) &&
		(settings.m_proxyAddress.empty() == false))
	{
		char portStr[64];

		pDownloader->setSetting("proxyaddress", settings.m_proxyAddress);
		snprintf(portStr, 64, "%u", settings.m_proxyPort);
		pDownloader->setSetting("proxyport", portStr);
		pDownloader->setSetting("proxytype", settings.m_proxyType);
	}

	// Run the query
	if (pEngine->runQuery(m_queryProps, m_startDoc) == false)
	{
		m_status = _("Couldn't run query on search engine");
		m_status += " ";
		m_status += m_engineDisplayableName;
	}
	else
	{
		const vector<DocumentInfo> &resultsList = pEngine->getResults();

		m_documentsList.clear();
		m_documentsList.reserve(resultsList.size());
		m_documentsCount = pEngine->getResultsCountEstimate();
#ifdef DEBUG
		cout << "QueryingThread::doWork: " << resultsList.size() << " off " << m_documentsCount
			<< " results to process, starting at position " << m_startDoc << endl;
#endif

		m_resultsCharset = pEngine->getResultsCharset();
		if (m_listingIndex == false)
		{
			processResults(resultsList);
		}
		else
		{
			processResults(resultsList,
				PinotSettings::getInstance().getIndexId(m_engineDisplayableName));
		}
	}

	delete pEngine;
}