void MetrolyricsFetcher::postProcess(std::string &data) const { // some of lyrics have both \n chars and <br />, html tags // are always present whereas \n chars are not, so we need to // throw them away to avoid having line breaks doubled. boost::replace_all(data, " ", ""); boost::replace_all(data, "<br />", "\n"); data = unescapeHtmlUtf8(data); LyricsFetcher::postProcess(data); }
void MetrolyricsFetcher::postProcess(std::string &data) { // throw away [ from ... ] info size_t i = data.find('['), j = data.find(']'); if (i != std::string::npos && i != std::string::npos) data.replace(i, j-i+1, ""); // some of lyrics have both \n chars and <br />, html tags // are always present whereas \n chars are not, so we need to // throw them away to avoid having line breaks doubled. boost::replace_all(data, " ", ""); boost::replace_all(data, "<br />", "\n"); data = unescapeHtmlUtf8(data); LyricsFetcher::postProcess(data); }
void MetrolyricsFetcher::postProcess(std::string &data) { // some of lyrics have both \n chars and <br />, html tags // are always present whereas \n chars are not, so we need to // throw them away to avoid having line breaks doubled. Regex::RE end(" "); Regex::RE br("<br />"); end.ReplaceAll("", data); br.ReplaceAll("\n", data); data = unescapeHtmlUtf8(data); LyricsFetcher::postProcess(data); }
LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title) { Result result; result.first = false; std::string search_str = artist; search_str += "+"; search_str += title; search_str += "+%2B"; search_str += siteKeyword(); std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q="; google_url += search_str; google_url += "&btnI=I%27m+Feeling+Lucky"; std::string data; CURLcode code = Curl::perform(data, google_url, google_url); if (code != CURLE_OK) { result.second = curl_easy_strerror(code); return result; } auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data); if (urls.empty() || !isURLOk(urls[0])) { result.second = msgNotFound; return result; } data = unescapeHtmlUtf8(urls[0]); //result.second = data; //return result; URL = data.c_str(); return LyricsFetcher::fetch("", ""); }
Service::Result ArtistInfo::processData(const std::string &data) { size_t a, b; Service::Result result; result.first = false; boost::regex rx("<content>(.*?)</content>"); boost::smatch what; if (boost::regex_search(data, what, rx)) { std::string desc = what[1]; // if there is a description... if (desc.length() > 0) { // ...locate the link to wiki on last.fm... rx.assign("<link rel=\"original\" href=\"(.*?)\""); if (boost::regex_search(data, what, rx)) { // ...try to get the content of it... std::string wiki; CURLcode code = Curl::perform(wiki, what[1]); if (code != CURLE_OK) { result.second = curl_easy_strerror(code); return result; } else { // ...and filter it to get the whole description. rx.assign("<div id=\"wiki\">(.*?)</div>"); if (boost::regex_search(wiki, what, rx)) desc = unescapeHtmlUtf8(what[1]); } } else { // otherwise, get rid of CDATA wrapper. rx.assign("<!\\[CDATA\\[(.*)\\]\\]>"); desc = boost::regex_replace(desc, rx, "\\1"); } stripHtmlTags(desc); boost::trim(desc); result.second += desc; } else result.second += "No description available for this artist."; } else { result.second = msgInvalidResponse; return result; } auto add_similars = [&result](boost::sregex_iterator &it, const boost::sregex_iterator &last) { for (; it != last; ++it) { std::string value = it->str(1); std::string url = it->str(2); stripHtmlTags(value); stripHtmlTags(url); result.second += "\n * "; result.second += value; result.second += " ("; result.second += url; result.second += ")"; } }; a = data.find("<similar>"); b = data.find("</similar>"); if (a != std::string::npos && b != std::string::npos) { rx.assign("<artist>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</artist>"); auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx); auto last = boost::sregex_iterator(); if (it != last) result.second += "\n\nSimilar artists:\n"; add_similars(it, last); } a = data.find("<tags>"); b = data.find("</tags>"); if (a != std::string::npos && b != std::string::npos) { rx.assign("<tag>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</tag>"); auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx); auto last = boost::sregex_iterator(); if (it != last) result.second += "\n\nSimilar tags:\n"; add_similars(it, last); } // get artist we look for, it's the one before similar artists rx.assign("<name>.*?</name>.*?<url>(.*?)</url>.*?<similar>"); if (boost::regex_search(data, what, rx)) { std::string url = what[1]; stripHtmlTags(url); result.second += "\n\n"; // add only url result.second += url; } result.first = true; return result; }