JNIEXPORT jstring JNICALL Java_info_narazaki_android_lib_text_HtmlUtils_shrinkHtml(JNIEnv *env, jclass cls,
        jstring orig, jboolean f_trim) {
    if (orig == NULL) {
        return env->NewString((jchar*) "", 0);
    }
    const jchar* orig_str = env->GetStringChars(orig, NULL);
    const jsize orig_len = env->GetStringLength(orig);
    jchar* strip_result_str = NULL;
    jsize strip_result_len = 0;
    jchar* unescape_result_str = NULL;
    jsize unescape_result_len = 0;
    jchar* shrink_result_str = NULL;
    jsize shrink_result_len = 0;

    bool changed_strip = stripHtmlTags(orig_str, orig_len, &strip_result_str, &strip_result_len, true);
    env->ReleaseStringChars(orig, orig_str);

    bool changed_unescape =
            unescapeHtml(strip_result_str, strip_result_len, &unescape_result_str, &unescape_result_len);
    std::free(strip_result_str);

    bool changed_shrink = shrinkWhiteSpace(unescape_result_str, unescape_result_len, &shrink_result_str,
            &shrink_result_len, f_trim);
    std::free(unescape_result_str);

    if (!changed_strip && !changed_unescape && !changed_shrink) {
        std::free(shrink_result_str);
        return orig;
    }

    jstring result = env->NewString(shrink_result_str, shrink_result_len);
    std::free(shrink_result_str);
    return result;
}
JNIEXPORT jstring JNICALL Java_info_narazaki_android_lib_text_HtmlUtils_stripAllHtmls(JNIEnv *env, jclass cls,
        jstring orig, jboolean conv_br) {
    if (orig == NULL) {
        return env->NewString((jchar*) "", 0);
    }
    const jchar* orig_str = env->GetStringChars(orig, NULL);
    const jsize orig_len = env->GetStringLength(orig);
    jchar* strip_result_str = NULL;
    jsize strip_result_len = 0;
    jchar* result_str = NULL;
    jsize result_len = 0;

    bool changed_strip = stripHtmlTags(orig_str, orig_len, &strip_result_str, &strip_result_len, conv_br);
    env->ReleaseStringChars(orig, orig_str);

    bool changed_unescape = unescapeHtml(strip_result_str, strip_result_len, &result_str, &result_len);

    std::free(strip_result_str);

    if (!changed_strip && !changed_unescape) {
        std::free(result_str);
        return orig;
    }

    jstring result = env->NewString(result_str, result_len);
    std::free(result_str);
    return result;
}
Exemple #3
0
LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const std::string &title)
{
	LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title);
	if (result.first == true)
	{
		Regex::RE br("<br />");

		result.first = false;
		std::string data;
		CURLcode code = Curl::perform(data, result.second, "", true);

		if (code != CURLE_OK)
		{
			result.second = curl_easy_strerror(code);
			return result;
		}

		auto lyrics = getContent("<div class='lyricbox'><script>.*?</script>(.*?)<!--", data);

		if (lyrics.empty())
		{
			result.second = msgNotFound;
			return result;
		}
		std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8);
		bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) {
			return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos;
		});
		if (license_restriction)
		{
			result.second = "License restriction";
			return result;
		}

		data.clear();
		for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
		{
			br.ReplaceAll("\n", *it);

			stripHtmlTags(*it);
			Regex::RE::Trim(*it);
			if (!it->empty())
			{
				data += *it;
				if (it != lyrics.end()-1)
					data += "\n\n----------\n\n";
			}
		}

		result.second = data;
		result.first = true;
	}
	return result;
}
Exemple #4
0
void LyricsFetcher::postProcess(std::string &data) const
{
	stripHtmlTags(data);
	boost::trim(data);
}
Service::Result ArtistInfo::processData(const std::string &data)
{
	size_t a, b;
	Service::Result result;
	result.first = false;
	
	boost::regex rx("<content>(.*?)</content>");
	boost::smatch what;
	if (boost::regex_search(data, what, rx))
	{
		std::string desc = what[1];
		// if there is a description...
		if (desc.length() > 0)
		{
			// ...locate the link to wiki on last.fm...
			rx.assign("<link rel=\"original\" href=\"(.*?)\"");
			if (boost::regex_search(data, what, rx))
			{
				// ...try to get the content of it...
				std::string wiki;
				CURLcode code = Curl::perform(wiki, what[1]);
				
				if (code != CURLE_OK)
				{
					result.second = curl_easy_strerror(code);
					return result;
				}
				else
				{
					// ...and filter it to get the whole description.
					rx.assign("<div id=\"wiki\">(.*?)</div>");
					if (boost::regex_search(wiki, what, rx))
						desc = unescapeHtmlUtf8(what[1]);
				}
			}
			else
			{
				// otherwise, get rid of CDATA wrapper.
				rx.assign("<!\\[CDATA\\[(.*)\\]\\]>");
				desc = boost::regex_replace(desc, rx, "\\1");
			}
			stripHtmlTags(desc);
			boost::trim(desc);
			result.second += desc;
		}
		else
			result.second += "No description available for this artist.";
	}
	else
	{
		result.second = msgInvalidResponse;
		return result;
	}
	
	auto add_similars = [&result](boost::sregex_iterator &it, const boost::sregex_iterator &last) {
		for (; it != last; ++it)
		{
			std::string value = it->str(1);
			std::string url = it->str(2);
			stripHtmlTags(value);
			stripHtmlTags(url);
			result.second += "\n * ";
			result.second += value;
			result.second += " (";
			result.second += url;
			result.second += ")";
		}
	};
	
	a = data.find("<similar>");
	b = data.find("</similar>");
	if (a != std::string::npos && b != std::string::npos)
	{
		rx.assign("<artist>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</artist>");
		auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx);
		auto last = boost::sregex_iterator();
		if (it != last)
			result.second += "\n\nSimilar artists:\n";
		add_similars(it, last);
	}
	
	a = data.find("<tags>");
	b = data.find("</tags>");
	if (a != std::string::npos && b != std::string::npos)
	{
		rx.assign("<tag>.*?<name>(.*?)</name>.*?<url>(.*?)</url>.*?</tag>");
		auto it = boost::sregex_iterator(data.begin()+a, data.begin()+b, rx);
		auto last = boost::sregex_iterator();
		if (it != last)
			result.second += "\n\nSimilar tags:\n";
		add_similars(it, last);
	}
	
	// get artist we look for, it's the one before similar artists
	rx.assign("<name>.*?</name>.*?<url>(.*?)</url>.*?<similar>");
	
	if (boost::regex_search(data, what, rx))
	{
		std::string url = what[1];
		stripHtmlTags(url);
		result.second += "\n\n";
		// add only url
		result.second += url;
	}
	
	result.first = true;
	return result;
}
Exemple #6
0
void LyricsFetcher::postProcess(std::string &data)
{
	stripHtmlTags(data);
	Regex::RE::Trim(data);
}