Beispiel #1
0
void MetrolyricsFetcher::postProcess(std::string &data) const
{
	// some of lyrics have both \n chars and <br />, html tags
	// are always present whereas \n chars are not, so we need to
	// throw them away to avoid having line breaks doubled.
	boost::replace_all(data, "&#10;", "");
	boost::replace_all(data, "<br />", "\n");
	data = unescapeHtmlUtf8(data);
	LyricsFetcher::postProcess(data);
}
Beispiel #2
0
void MetrolyricsFetcher::postProcess(std::string &data)
{
	// throw away [ from ... ] info
	size_t i = data.find('['), j = data.find(']');
	if (i != std::string::npos && i != std::string::npos)
		data.replace(i, j-i+1, "");
	// some of lyrics have both \n chars and <br />, html tags
	// are always present whereas \n chars are not, so we need to
	// throw them away to avoid having line breaks doubled.
	boost::replace_all(data, "&#10;", "");
	boost::replace_all(data, "<br />", "\n");
	data = unescapeHtmlUtf8(data);
	LyricsFetcher::postProcess(data);
}
Beispiel #3
0
void MetrolyricsFetcher::postProcess(std::string &data)
{
	// some of lyrics have both \n chars and <br />, html tags
	// are always present whereas \n chars are not, so we need to
	// throw them away to avoid having line breaks doubled.
	Regex::RE end("&#10;");
	Regex::RE br("<br />");

	end.ReplaceAll("", data);
	br.ReplaceAll("\n", data);

	data = unescapeHtmlUtf8(data);
	LyricsFetcher::postProcess(data);
}
Beispiel #4
0
LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title)
{
	Result result;
	result.first = false;
	
	std::string search_str = artist;
	search_str += "+";
	search_str += title;
	search_str += "+%2B";
	search_str += siteKeyword();
	
	std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
	google_url += search_str;
	google_url += "&btnI=I%27m+Feeling+Lucky";
	
	std::string data;
	CURLcode code = Curl::perform(data, google_url, google_url);
	
	if (code != CURLE_OK)
	{
		result.second = curl_easy_strerror(code);
		return result;
	}
	
	auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data);
	
	if (urls.empty() || !isURLOk(urls[0]))
	{
		result.second = msgNotFound;
		return result;
	}
	
	data = unescapeHtmlUtf8(urls[0]);
	//result.second = data;
	//return result;
	
	URL = data.c_str();
	return LyricsFetcher::fetch("", "");
}
Beispiel #5
0
Service::Result ArtistInfo::processData(const std::string &data)
{
	size_t a, b;
	Service::Result result;
	result.first = false;
	
	boost::regex rx("<content>(.*?)</content>");
	boost::smatch what;
	if (boost::regex_search(data, what, rx))
	{
		std::string desc = what[1];
		// if there is a description...
		if (desc.length() > 0)
		{
			// ...locate the link to wiki on last.fm...
			rx.assign("<link rel=\"original\" href=\"(.*?)\"");
			if (boost::regex_search(data, what, rx))
			{
				// ...try to get the content of it...
				std::string wiki;
				CURLcode code = Curl::perform(wiki, what[1]);
				
				if (code != CURLE_OK)
				{
					result.second = curl_easy_strerror(code);
					return result;
				}
				else
				{
					// ...and filter it to get the whole description.
					rx.assign("<div id=\"wiki\">(.*?)</div>");
					if (boost::regex_search(wiki, what, rx))
						desc = unescapeHtmlUtf8(what[1]);
				}
			}
			else
			{
				// otherwise, get rid of CDATA wrapper.
				rx.assign("<!\\[CDATA\\[(.*)\\]\\]>");
				desc = boost::regex_replace(desc, rx, "\\1");
			}
			stripHtmlTags(desc);
			boost::trim(desc);
			result.second += desc;
		}
		else
			result.second += "No description available for this artist.";
	}
	else
	{
		result.second = msgInvalidResponse;
		return result;
	}
	
	auto add_similars = [&result](boost::sregex_iterator &it, const boost::sregex_iterator &last) {
		for (; it != last; ++it)
		{
			std::string value = it->str(1);
			std::string url = it->str(2);
			stripHtmlTags(value);
			stripHtmlTags(url);
			result.second += "\n * ";
			result.second += value;
			result.second += " (";
			result.second += url;
			result.second += ")";
		}
	};
	
	a = data.find("<similar>");
	b = data.find("</similar>");
	if (a != std::string::npos && b != std::string::npos)
	{
		rx.assign("<artist>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</artist>");
		auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx);
		auto last = boost::sregex_iterator();
		if (it != last)
			result.second += "\n\nSimilar artists:\n";
		add_similars(it, last);
	}
	
	a = data.find("<tags>");
	b = data.find("</tags>");
	if (a != std::string::npos && b != std::string::npos)
	{
		rx.assign("<tag>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</tag>");
		auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx);
		auto last = boost::sregex_iterator();
		if (it != last)
			result.second += "\n\nSimilar tags:\n";
		add_similars(it, last);
	}
	
	// get artist we look for, it's the one before similar artists
	rx.assign("<name>.*?</name>.*?<url>(.*?)</url>.*?<similar>");
	
	if (boost::regex_search(data, what, rx))
	{
		std::string url = what[1];
		stripHtmlTags(url);
		result.second += "\n\n";
		// add only url
		result.second += url;
	}
	
	result.first = true;
	return result;
}