JNIEXPORT jstring JNICALL Java_info_narazaki_android_lib_text_HtmlUtils_shrinkHtml(JNIEnv *env, jclass cls, jstring orig, jboolean f_trim) { if (orig == NULL) { return env->NewString((jchar*) "", 0); } const jchar* orig_str = env->GetStringChars(orig, NULL); const jsize orig_len = env->GetStringLength(orig); jchar* strip_result_str = NULL; jsize strip_result_len = 0; jchar* unescape_result_str = NULL; jsize unescape_result_len = 0; jchar* shrink_result_str = NULL; jsize shrink_result_len = 0; bool changed_strip = stripHtmlTags(orig_str, orig_len, &strip_result_str, &strip_result_len, true); env->ReleaseStringChars(orig, orig_str); bool changed_unescape = unescapeHtml(strip_result_str, strip_result_len, &unescape_result_str, &unescape_result_len); std::free(strip_result_str); bool changed_shrink = shrinkWhiteSpace(unescape_result_str, unescape_result_len, &shrink_result_str, &shrink_result_len, f_trim); std::free(unescape_result_str); if (!changed_strip && !changed_unescape && !changed_shrink) { std::free(shrink_result_str); return orig; } jstring result = env->NewString(shrink_result_str, shrink_result_len); std::free(shrink_result_str); return result; }
JNIEXPORT jstring JNICALL Java_info_narazaki_android_lib_text_HtmlUtils_stripAllHtmls(JNIEnv *env, jclass cls, jstring orig, jboolean conv_br) { if (orig == NULL) { return env->NewString((jchar*) "", 0); } const jchar* orig_str = env->GetStringChars(orig, NULL); const jsize orig_len = env->GetStringLength(orig); jchar* strip_result_str = NULL; jsize strip_result_len = 0; jchar* result_str = NULL; jsize result_len = 0; bool changed_strip = stripHtmlTags(orig_str, orig_len, &strip_result_str, &strip_result_len, conv_br); env->ReleaseStringChars(orig, orig_str); bool changed_unescape = unescapeHtml(strip_result_str, strip_result_len, &result_str, &result_len); std::free(strip_result_str); if (!changed_strip && !changed_unescape) { std::free(result_str); return orig; } jstring result = env->NewString(result_str, result_len); std::free(result_str); return result; }
LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const std::string &title) { LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title); if (result.first == true) { Regex::RE br("<br />"); result.first = false; std::string data; CURLcode code = Curl::perform(data, result.second, "", true); if (code != CURLE_OK) { result.second = curl_easy_strerror(code); return result; } auto lyrics = getContent("<div class='lyricbox'><script>.*?</script>(.*?)<!--", data); if (lyrics.empty()) { result.second = msgNotFound; return result; } std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8); bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) { return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos; }); if (license_restriction) { result.second = "License restriction"; return result; } data.clear(); for (auto it = lyrics.begin(); it != lyrics.end(); ++it) { br.ReplaceAll("\n", *it); stripHtmlTags(*it); Regex::RE::Trim(*it); if (!it->empty()) { data += *it; if (it != lyrics.end()-1) data += "\n\n----------\n\n"; } } result.second = data; result.first = true; } return result; }
void LyricsFetcher::postProcess(std::string &data) const { stripHtmlTags(data); boost::trim(data); }
Service::Result ArtistInfo::processData(const std::string &data) { size_t a, b; Service::Result result; result.first = false; boost::regex rx("<content>(.*?)</content>"); boost::smatch what; if (boost::regex_search(data, what, rx)) { std::string desc = what[1]; // if there is a description... if (desc.length() > 0) { // ...locate the link to wiki on last.fm... rx.assign("<link rel=\"original\" href=\"(.*?)\""); if (boost::regex_search(data, what, rx)) { // ...try to get the content of it... std::string wiki; CURLcode code = Curl::perform(wiki, what[1]); if (code != CURLE_OK) { result.second = curl_easy_strerror(code); return result; } else { // ...and filter it to get the whole description. rx.assign("<div id=\"wiki\">(.*?)</div>"); if (boost::regex_search(wiki, what, rx)) desc = unescapeHtmlUtf8(what[1]); } } else { // otherwise, get rid of CDATA wrapper. rx.assign("<!\\[CDATA\\[(.*)\\]\\]>"); desc = boost::regex_replace(desc, rx, "\\1"); } stripHtmlTags(desc); boost::trim(desc); result.second += desc; } else result.second += "No description available for this artist."; } else { result.second = msgInvalidResponse; return result; } auto add_similars = [&result](boost::sregex_iterator &it, const boost::sregex_iterator &last) { for (; it != last; ++it) { std::string value = it->str(1); std::string url = it->str(2); stripHtmlTags(value); stripHtmlTags(url); result.second += "\n * "; result.second += value; result.second += " ("; result.second += url; result.second += ")"; } }; a = data.find("<similar>"); b = data.find("</similar>"); if (a != std::string::npos && b != std::string::npos) { rx.assign("<artist>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</artist>"); auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx); auto last = boost::sregex_iterator(); if (it != last) result.second += "\n\nSimilar artists:\n"; add_similars(it, last); } a = data.find("<tags>"); b = data.find("</tags>"); if (a != std::string::npos && b != std::string::npos) { rx.assign("<tag>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</tag>"); auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx); auto last = boost::sregex_iterator(); if (it != last) result.second += "\n\nSimilar tags:\n"; add_similars(it, last); } // get artist we look for, it's the one before similar artists rx.assign("<name>.*?</name>.*?<url>(.*?)</url>.*?<similar>"); if (boost::regex_search(data, what, rx)) { std::string url = what[1]; stripHtmlTags(url); result.second += "\n\n"; // add only url result.second += url; } result.first = true; return result; }
void LyricsFetcher::postProcess(std::string &data) { stripHtmlTags(data); Regex::RE::Trim(data); }