Esempio n. 1
0
int main(int argc, char * argv[])
{
	std::ifstream fs("C:\\Lookup\\112.127.141.86.html");
	std::string in;
	load_file(in, fs);
	fs.close();
	//static CRegexpT <char> regexp1("\\d+");
	static CRegexpT <char> regexp1("target=\\\"_blank\\\"\\>(.+?)\\</a\\>\\</td\\>");
	// loop
	MatchResult result1 = regexp1.Match(in.c_str());

	while( result1.IsMatched() )
	{  
		//GetEnd匹配成功后,获取所匹配到的子字符串的结束位置。如果匹配失败,则返回负值。
		//GetStart匹配成功后,获取所匹配到的子字符串的开始位置。如果匹配失败,则返回负值。
		printf("%.*s\n", result1.GetEnd() - result1.GetStart(), in.c_str() + result1.GetStart());
		// get next
		result1 = regexp1.Match(in.c_str(), result1.GetEnd());  //返回匹配结果 MatchResult 对象。
		// 通过 MatchResult 对象,可以得知是否匹配成功。如果成功,通过 MatchResult 对象可以获取捕获信息。

	}

	// text
	char * text = "http://www.cppprog.com/2009/0112/48.html";
	// declare
	static CRegexpT <char> regexp("\\d+");
	// loop
	MatchResult result = regexp.Match(text);
	//IsMatched返回非零值表示匹配成功,返回 0 表示匹配失败。
	while( result.IsMatched() )
	{  
		//GetEnd匹配成功后,获取所匹配到的子字符串的结束位置。如果匹配失败,则返回负值。
		//GetStart匹配成功后,获取所匹配到的子字符串的开始位置。如果匹配失败,则返回负值。
		printf("%.*s\n", result.GetEnd() - result.GetStart(), text + result.GetStart());
		// get next
		result = regexp.Match(text, result.GetEnd());  //返回匹配结果 MatchResult 对象。
		// 通过 MatchResult 对象,可以得知是否匹配成功。如果成功,通过 MatchResult 对象可以获取捕获信息。

	}
	return 0;
}
pfc::string8 provider_azlyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_azlyrics::lookup_one");

	const float threshold = 0.8f;

	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;

	try
	{
		TRACK_CALL_TEXT("Try");
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		const metadb_handle_ptr & p = p_meta;

		if (p.is_empty())
		{
			return "";
		}

		pfc::string8_fast artist, title, album;
		static_api_ptr_t<titleformat_compiler> compiler;
		service_ptr_t<titleformat_object> script;

		file_info_impl info;
		p->get_info(info);

		// Get count of artists
		t_size count = info.meta_get_count_by_name("artist");

		// Get TITLE
		title = info.meta_get("title", 0);

		// Iterate through all artists listed
		for (int j = 0; j < count; j++)
		{
			TRACK_CALL_TEXT("For");
			// Get Artist
			artist = info.meta_get("artist", j);

			console::printf("%s - %s", artist, title);

			// Search the lyrics
			pfc::string8_fast url("http://search.azlyrics.com/search.php?q=");

			url += fetcher.quote(artist);
			url += "+";
			url += fetcher.quote(title);

			// Get it now
			try
			{
				fetcher.fetch(url, buff);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			if (buff.get_length() == 0)
				continue;

			int resultStart = buff.find_first("<b>1.</b>");
			int startUrl = buff.find_first("<a href=\"", resultStart) + 9;
			int endUrl = buff.find_first("\"", startUrl);

			url = pfc::string8_fast(buff.get_ptr()+startUrl, endUrl - startUrl);

			// Get it now
			try
			{
				fetcher.fetch(url, buff);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			if (buff.get_length() == 0)
				continue;

			const char * regex_lyrics = "<!-- END OF RINGTONE 1 -->\\s*?<b>\"(?P<title>.*?)\"</b><br>\\s*?<br>\\s\\s(?P<lyrics>.*?)\\s<br>";

			// expression for extract lyrics
			regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

			int noGroup = regexp.GetNamedGroupNumber("lyrics");
			int noTitle = regexp.GetNamedGroupNumber("title");

			MatchResult result = regexp.Match(buff.get_ptr());

			if (result.IsMatched())
			{
				int nStart = result.GetGroupStart(noTitle);
				int nEnd = result.GetGroupEnd(noTitle);

				pfc::string8_fast songTitle(buff.get_ptr() +nStart, nEnd - nStart);

				int levDist = LD(title, title.get_length(), songTitle, songTitle.get_length());

				float good = 1.0f - ((float)levDist / title.get_length());

				if (good < threshold)
					return "";

				nStart = result.GetGroupStart(noGroup);
				nEnd = result.GetGroupEnd(noGroup);

				pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

				convert_html_to_plain(lyric);

				if (lyric.get_length() > 0)
				{
					string_helper::remove_end_linebreaks(lyric);

					return lyric;
				}
			}
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		return "";
	}
	catch (...)
	{
		return "";
	}

	return "";
}
//************************************************************************
//*                              AZ Lyrics                               *
//************************************************************************
pfc::string_list_impl * provider_azlyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_azlyrics::lookup");
	const float threshold = 0.8f;

	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;
	pfc::string_list_impl * str_list = new pfc::string_list_impl;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		for (t_size i = 0; i < p_meta.get_count(); ++i)
		{
			if (p_abort.is_aborting())
				break;

			// Sleep
			how_to_sleep(i);
			// Clear buff
			buff.reset();

			const metadb_handle_ptr & p = p_meta.get_item(i);

			if (p.is_empty())
			{
				str_list->add_item("");
				continue;
			}

			// Set progress
			pfc::string8_fast path = file_path_canonical(p->get_path());

			// add subsong index?
			if (p->get_subsong_index() > 0)
			{
				path.add_string(" /index:");
				path.add_string(pfc::format_uint(p->get_subsong_index()));
			}

			p_status.set_item_path(path);
			p_status.set_progress(i + 1, p_meta.get_count());

			pfc::string8_fast artist, title;

			file_info_impl info;
			p->get_info(info);

			// Get count of artists
			t_size count = info.meta_get_count_by_name("artist");

			// Get TITLE
			title = info.meta_get("title", 0);

			bool found = false;

			// Iterate through all artists listed
			for (int j = 0; j < count && !found; j++)
			{
				// Get Artist
				artist = info.meta_get("artist", j);

				// Search the lyrics
				pfc::string8_fast url("http://search.azlyrics.com/search.php?q=");

				url += fetcher.quote(artist);
				url += "+";
				url += fetcher.quote(title);

				// Get it now
				try
				{
					fetcher.fetch(url, buff);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				int resultStart = buff.find_first("<b>1.</b>");
				int startUrl = buff.find_first("<a href=\"", resultStart) + 9;
				int endUrl = buff.find_first("\"", startUrl);

				url = pfc::string8_fast(buff.get_ptr()+startUrl, endUrl - startUrl);

				// Get it now
				try
				{
					fetcher.fetch(url, buff);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				const char * regex_lyrics = "<!-- END OF RINGTONE 1 -->\\s*?<b>\"(?P<title>.*?)\"</b><br>\\s*?<br>\\s\\s(?P<lyrics>.*?)\\s<br>";

				// expression for extract lyrics
				regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

				int noGroup = regexp.GetNamedGroupNumber("lyrics");
				int noTitle = regexp.GetNamedGroupNumber("title");

				// match
				MatchResult result = regexp.Match(buff.get_ptr());

				if (result.IsMatched())
				{
					int nStart = result.GetGroupStart(noTitle);
					int nEnd = result.GetGroupEnd(noTitle);

					pfc::string8_fast songTitle(buff.get_ptr() +nStart, nEnd - nStart);

					int levDist = LD(title, title.get_length(), songTitle, songTitle.get_length());

					float good = 1.0f - ((float)levDist / title.get_length());

					if (good < threshold)
						continue;

					nStart = result.GetGroupStart(noGroup);
					nEnd = result.GetGroupEnd(noGroup);

					pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

					convert_html_to_plain(lyric);

					if (lyric.get_length() > 0)
					{
						string_helper::remove_end_linebreaks(lyric);

						str_list->add_item(lyric);
						found = true;
						continue;
					}
				}
			}
			if (found)
				continue;
			else
				str_list->add_item("");
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		delete str_list;
		return NULL;
	}
	catch (...)
	{
		delete str_list;
		return NULL;
	}

	return str_list;
}
pfc::string8 provider_darklyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_darklyrics::lookup_one");

	const float threshold = 0.8f;
	
	const pfc::string8 site = "darklyrics.com";

	// Regular Expression Class
	CRegexpT<char> regexp;
	MatchResult match;

	// Buffer
	pfc::string8 buff;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		const metadb_handle_ptr & p = p_meta;

		if (p.is_empty())
		{
			return "";
		}

		pfc::string8_fast artist, title, album, keywords;

		file_info_impl info;
		p->get_info(info);

		// Get count of artists
		t_size count = info.meta_get_count_by_name("album");

		if (count == 0)
			return "";
		// Get Album
		album = info.meta_get("album", 0);

		count = info.meta_get_count_by_name("title");

		if (count == 0)
			return "";

		// Get TITLE
		title = info.meta_get("title", 0);

		count = info.meta_get_count_by_name("artist");

		// Iterate through all artists listed
		for (int j = 0; j < count; j++)
		{
			// Get Artist
			artist = info.meta_get("artist", j);		//Fetching from HTTP

			keywords = artist;
			keywords += "+";
			keywords += album;

			keywords.replace_char(' ', '+');

			// Get it now
			try
			{
				fetcher.fetch_googleluck(site, keywords, buff);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			const char * regex_ahref = "<a\\shref=\"#(?P<no>\\d+)\">(?P<text>.+?)</a>";

			// expression for extract lyrics
			regexp.Compile(regex_ahref, IGNORECASE);

			// match
			MatchResult result = regexp.Match(buff.get_ptr());

			int noGroup = regexp.GetNamedGroupNumber("no");
			int textGroup = regexp.GetNamedGroupNumber("text");

			int jump_to = 0;
			pfc::string8_fast compare = title;
			compare.insert_chars(0, ". ");
			float good;
			float best = 0.0f;


			while (result.IsMatched())
			{
				int gStart = result.GetGroupStart(noGroup);
				int gEnd = result.GetGroupEnd(noGroup);
				pfc::string8_fast temp(buff.get_ptr() + gStart, gEnd - gStart);
				int no = StrToIntA(temp);

				gStart = result.GetGroupStart(textGroup);
				gEnd = result.GetGroupEnd(textGroup);

				temp = pfc::string8_fast(buff.get_ptr()+gStart, gEnd - gStart);

				int levDist = LD(compare, compare.get_length(), temp, temp.get_length());

				good = 1.0f - (levDist / (float)compare.get_length());

				if (good >= threshold && good > best)
				{
					jump_to = no;
					best = good;
				}
				result = regexp.Match(buff.get_ptr(),result.GetEnd());
			}

			if (jump_to == 0)
			{
				continue;
			}

			char regex_lyrics[100];

			sprintf(regex_lyrics, "<a\\s+name=%d><font*(.*?)</font*(.*?)>(?P<lyrics>.+?)<font", jump_to);

			// expression for extract lyrics
			regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

			noGroup = regexp.GetNamedGroupNumber("lyrics");

			result = regexp.Match(buff.get_ptr());

			if (result.IsMatched())
			{
				int nStart = result.GetGroupStart(noGroup);
				int nEnd = result.GetGroupEnd(noGroup);
				pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

				convert_html_to_plain(lyric);

				if (lyric.get_length() > 0)
				{
					string_helper::remove_beginning_linebreaks(lyric);
					string_helper::remove_end_linebreaks(lyric);
					string_helper::remove_beginning(lyric, ' ');
					string_helper::remove_beginning(lyric, '\t');
					return lyric;
				}
			}
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		return "";
	}
	catch (...)
	{
		return "";
	}

	return "";
}
//************************************************************************
//*                             Dark Lyrics                              *
//************************************************************************
pfc::string_list_impl * provider_darklyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_darklyrics::lookup");

	const float threshold = 0.8f;

	const pfc::string8 site = "darklyrics.com";

	// Regular Expression Class
	CRegexpT<char> regexp;
	MatchResult match;

	// Buffer
	pfc::string8 buff;
	pfc::string_list_impl * str_list = new pfc::string_list_impl;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		for (t_size i = 0; i < p_meta.get_count(); ++i)
		{
			if (p_abort.is_aborting())
				break;

			// Sleep
			how_to_sleep(i);
			// Clear buff
			buff.reset();

			const metadb_handle_ptr & p = p_meta.get_item(i);

			if (p.is_empty())
			{
				str_list->add_item("");
				continue;
			}

			// Set progress
			pfc::string8_fast path = file_path_canonical(p->get_path());

			// add subsong index?
			if (p->get_subsong_index() > 0)
			{
				path.add_string(" /index:");
				path.add_string(pfc::format_uint(p->get_subsong_index()));
			}

			p_status.set_item_path(path);
			p_status.set_progress(i + 1, p_meta.get_count());

			pfc::string8_fast artist, title, album, keywords;

			file_info_impl info;
			p->get_info(info);

			// Get count of artists
			t_size count = info.meta_get_count_by_name("album");

			if (count == 0)
				continue;
			// Get Album
			album = info.meta_get("album", 0);

			count = info.meta_get_count_by_name("title");

			if (count == 0)
				continue;

			// Get TITLE
			title = info.meta_get("title", 0);

			count = info.meta_get_count_by_name("artist");

			bool found = false;

			// Iterate through all artists listed
			for (int j = 0; j < count && !found; j++)
			{
				// Get Artist
				artist = info.meta_get("artist", j);

				keywords = artist;
				keywords += "+";
				keywords += album;

				keywords.replace_char(' ', '+');

				// Get it now
				try
				{
					fetcher.fetch_googleluck(site, keywords, buff);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				const char * regex_ahref = "<a\\shref=\"#(?P<no>\\d+)\">(?P<text>.+?)</a>";

				// expression for extract lyrics
				regexp.Compile(regex_ahref, IGNORECASE | SINGLELINE);

				// match
				MatchResult result = regexp.Match(buff.get_ptr());

				int noGroup = regexp.GetNamedGroupNumber("no");
				int textGroup = regexp.GetNamedGroupNumber("text");

				int jump_to = 0;
				pfc::string8_fast compare = title;
				compare.insert_chars(0, ". ");
				float good;
				float best = 0.0f;

				while (result.IsMatched())
				{
					int gStart = result.GetGroupStart(noGroup);
					int gEnd = result.GetGroupEnd(noGroup);
					pfc::string8_fast temp(buff.get_ptr() + gStart, gEnd - gStart);
					int no = StrToIntA(temp);

					gStart = result.GetGroupStart(textGroup);
					gEnd = result.GetGroupEnd(textGroup);

					temp = pfc::string8_fast(buff.get_ptr()+gStart, gEnd - gStart);

					if (temp.find_first(title) != -1)
					{
						jump_to = no;
						break;
					}

					int levDist = LD(compare, compare.get_length(), temp, temp.get_length());

					good = 1.0f - (levDist / (float)compare.get_length());

					if (good >= threshold && good > best)
					{
						jump_to = no;
						best = good;
					}

					result = regexp.Match(buff.get_ptr(),result.GetEnd());
				}

				if (jump_to == 0)
				{
					continue;
				}

				char regex_lyrics[100];

				sprintf(regex_lyrics, "<a\\s+name=%d><font*(.*?)</font*(.*?)>(?P<lyrics>.+?)<font", jump_to);

				// expression for extract lyrics
				regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

				noGroup = regexp.GetNamedGroupNumber("lyrics");

				result = regexp.Match(buff.get_ptr());

				if (result.IsMatched())
				{
					int nStart = result.GetGroupStart(noGroup);
					int nEnd = result.GetGroupEnd(noGroup);
					pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

					convert_html_to_plain(lyric);

					if (lyric.get_length() > 0)
					{
						string_helper::remove_beginning_linebreaks(lyric);
						string_helper::remove_end_linebreaks(lyric);
						string_helper::remove_beginning(lyric, ' ');
						string_helper::remove_beginning(lyric, '\t');

						console::print(lyric);

						str_list->add_item(lyric);
						found = true;
						continue;
					}
				}
			}
			if (found)
				continue;
			else
				str_list->add_item("");
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		delete str_list;
		return NULL;
	}
	catch (...)
	{
		delete str_list;
		return NULL;
	}

	return str_list;
}
pfc::string8 provider_lyricsplugin::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	// Regular Expression Class
	CRegexpT<char> regexp;
	MatchResult match;

	// Buffer
	pfc::string8 buff;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		// Clear buff
		buff.reset();

		const metadb_handle_ptr & p = p_meta;

		if (p.is_empty())
		{
			return "";
		}

		pfc::string8_fast artist, title;
		static_api_ptr_t<titleformat_compiler> compiler;
		service_ptr_t<titleformat_object> script;

		file_info_impl info;
		p->get_info(info);

		// Get count of artists
		t_size count = info.meta_get_count_by_name("artist");

		// Get TITLE
		compiler->compile_safe(script, "[%title%]");
		p->format_title(NULL, title, script, NULL);

		bool found = false;

		// Iterate through all artists listed
		for (int j = 0; j < count && !found; j++)
		{
			// Get Artist
			artist = info.meta_get("artist", j);
			// Fetching from HTTP
			// Set HTTP Address
			pfc::string8_fast url("http://www.squirrelscript.net/mediamonkey/Lyricator/lyrics.php?artist=");

			// URL = http://www.squirrelscript.net/mediamonkey/Lyricator/lyrics.php?artist=<Artist>&title=<Title>

			url += fetcher.quote(artist);
			url += "&title=";
			url += fetcher.quote(title);

			// Get it now
			try
			{
				fetcher.fetch(url, buff);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			const char * regex_lyricbox = "<div\\s+id\\s*?=\\s*?\"lyrics\"\\s*?>[\\r\\n]*(.*?)[\\r\\n]*</div>";

			// expression for extract lyrics
			regexp.Compile(regex_lyricbox, SINGLELINE);

			// match
			MatchResult result = regexp.Match(buff.get_ptr());

			// Get Group
			if (result.IsMatched())
			{
				int nStart = result.GetGroupStart(1);
				int nEnd = result.GetGroupEnd(1);
				int index;
				pfc::string8_fast lyric(buff.get_ptr() + nStart, nEnd - nStart);

				convert_html_to_plain(lyric);

				index = lyric.find_first("www.tunerankings.com");

				if (index == 0)
				{
					continue;
				}
				else if (index != -1)
				{
					lyric.remove_chars(index, 20);
				}

				if (string_trim(lyric).get_length() > 0)
				{
					return lyric;
				}
			}
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		return "";
	}
	catch (...)
	{
		return "";
	}

	return "";
}
pfc::string8 provider_leoslyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_leoslyrics::lookup_one");
	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		const metadb_handle_ptr & p = p_meta;

		if (p.is_empty())
		{
			return "";
		}

		pfc::string8_fast artist, title, album;

		file_info_impl info;
		p->get_info(info);

		// Get count of artists
		t_size count = info.meta_get_count_by_name("artist");

		// Get TITLE
		static_api_ptr_t<titleformat_compiler> compiler;
		service_ptr_t<titleformat_object> script;

		compiler->compile_safe(script, "%title%");
		p->format_title(NULL, title, script, NULL);

		// Iterate through all artists listed
		for (int j = 0; j < count; j++)
		{
			// Get Artist
			artist = info.meta_get("artist", j);

			//Fetching from HTTP
			// Set HTTP Address
			pfc::string8_fast url("http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=");
			pfc::string8_fast host("api.leoslyrics.com");

			//URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=<artist>&songtitle=<title>

			url += fetcher.quote(artist);
			url += "&songtitle=";
			url += fetcher.quote(title);


			// Get it now
			try
			{
				fetcher.fetch_host(host, url, buff, p_abort);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			const char * regex_hid = "code=\"(?P<code>.).*?hid=\"(?P<hid>.*?)\"";

			// expression for extract lyrics
			regexp.Compile(regex_hid, IGNORECASE | SINGLELINE);

			pfc::string8_fast hid;

			int codeGroup = regexp.GetNamedGroupNumber("code");
			int hidGroup = regexp.GetNamedGroupNumber("hid");

			MatchResult result = regexp.Match(buff.get_ptr());

			if (result.IsMatched())
			{
				int nStart = result.GetGroupStart(codeGroup);
				int nEnd = result.GetGroupEnd(codeGroup);

				pfc::string8_fast code(buff.get_ptr() + nStart, nEnd-nStart);

				if (code.find_first("0") == -1)
					continue;

				nStart = result.GetGroupStart(hidGroup);
				nEnd = result.GetGroupEnd(hidGroup);

				hid = pfc::string8_fast(buff.get_ptr() + nStart, nEnd - nStart);

				url = "http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=";
				url += hid;

				//URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=<songID>

				try
				{
					fetcher.fetch_host(host, url, buff, p_abort);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				const char * regex_hid = "<text>\\s(?P<lyrics>.*?)\\s</text>";

				// expression for extract lyrics
				regexp.Compile(regex_hid, IGNORECASE | SINGLELINE);

				int lyricsGroup = regexp.GetNamedGroupNumber("lyrics");

				result = regexp.Match(buff.get_ptr());

				if (result.IsMatched())
				{
					nStart = result.GetGroupStart(lyricsGroup);
					nEnd = result.GetGroupEnd(lyricsGroup);

					pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

					if (lyric.get_length() > 0)
					{
						string_helper::remove_beginning_linebreaks(lyric);
						string_helper::remove_end_linebreaks(lyric);

						return lyric;
					}
				}
			}
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		return "";
	}
	catch (...)
	{
		return "";
	}

	return "";
}
//************************************************************************
//*                           Leo's Lyrics                               *
//************************************************************************
pfc::string_list_impl * provider_leoslyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_leoslyrics::lookup");

	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;
	pfc::string_list_impl * str_list = new pfc::string_list_impl;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		for (t_size i = 0; i < p_meta.get_count(); ++i)
		{
			if (p_abort.is_aborting())
				break;

			// Sleep
			how_to_sleep(i);
			// Clear buff
			buff.reset();

			const metadb_handle_ptr & p = p_meta.get_item(i);

			if (p.is_empty())
			{
				str_list->add_item("");
				continue;
			}

			// Set progress
			pfc::string8_fast path = file_path_canonical(p->get_path());

			// add subsong index?
			if (p->get_subsong_index() > 0)
			{
				path.add_string(" /index:");
				path.add_string(pfc::format_uint(p->get_subsong_index()));
			}

			p_status.set_item_path(path);
			p_status.set_progress(i + 1, p_meta.get_count());

			pfc::string8_fast artist, title;

			file_info_impl info;
			p->get_info(info);

			// Get count of artists
			t_size count = info.meta_get_count_by_name("artist");

			// Get TITLE
			static_api_ptr_t<titleformat_compiler> compiler;
			service_ptr_t<titleformat_object> script;
	
			compiler->compile_safe(script, "%title%");
			p->format_title(NULL, title, script, NULL);

			bool found = false;

			// Iterate through all artists listed
			for (int j = 0; j < count && !found; j++)
			{
				// Get Artist
				artist = info.meta_get("artist", j);


				pfc::string8_fast url("http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=");
				pfc::string8_fast host("api.leoslyrics.com");

				//URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=<artist>&songtitle=<title>

				url += fetcher.quote(artist);
				url += "&songtitle=";
				url += fetcher.quote(title);

				// Get it now
				try
				{
					fetcher.fetch_host(host, url, buff, p_abort);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				const char * regex_hid = "code=\"(?P<code>.).*?hid=\"(?P<hid>.*?)\"";

				// expression for extract lyrics
				regexp.Compile(regex_hid, IGNORECASE | SINGLELINE);

				pfc::string8_fast hid;

				int codeGroup = regexp.GetNamedGroupNumber("code");
				int hidGroup = regexp.GetNamedGroupNumber("hid");

				MatchResult result = regexp.Match(buff.get_ptr());

				if (result.IsMatched())
				{
					int nStart = result.GetGroupStart(codeGroup);
					int nEnd = result.GetGroupEnd(codeGroup);

					pfc::string8_fast code(buff.get_ptr() + nStart, nEnd-nStart);

					if (code.find_first("0") == -1)
						continue;

					nStart = result.GetGroupStart(hidGroup);
					nEnd = result.GetGroupEnd(hidGroup);

					hid = pfc::string8_fast(buff.get_ptr() + nStart, nEnd - nStart);
				
					url = "http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=";
					url += hid;

					//URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=<songID>

					try
					{
						fetcher.fetch_host(host, url, buff, p_abort);
					}
					catch (pfc::exception & e)
					{
						console_error(e.what());
						continue;
					}
					catch (...)
					{
						continue;
					}

					const char * regex_hid = "<text>\\s(?P<lyrics>.*?)\\s</text>";

					// expression for extract lyrics
					regexp.Compile(regex_hid, IGNORECASE | SINGLELINE);

					int lyricsGroup = regexp.GetNamedGroupNumber("lyrics");

					result = regexp.Match(buff.get_ptr());

					if (result.IsMatched())
					{
						nStart = result.GetGroupStart(lyricsGroup);
						nEnd = result.GetGroupEnd(lyricsGroup);

						pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

						if (lyric.get_length() > 0)
						{
							found = true;

							string_helper::remove_beginning_linebreaks(lyric);
							string_helper::remove_end_linebreaks(lyric);

							str_list->add_item(lyric);
							continue;
						}
					}
				}
			}
			if (found)
				continue;
			else
				str_list->add_item("");
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		delete str_list;
		return NULL;
	}
	catch (...)
	{
		delete str_list;
		return NULL;
	}

	return str_list;
}
pfc::string8 provider_lyricwiki::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_lyricwiki::lookup_one");
	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		const metadb_handle_ptr & p = p_meta;

		if (p.is_empty())
		{
			return "";
		}

		pfc::string8_fast artist, title, album;

		file_info_impl info;
		p->get_info(info);

		// Get count of artists
		t_size count = info.meta_get_count_by_name("artist");

		// Get TITLE
		static_api_ptr_t<titleformat_compiler> compiler;
		service_ptr_t<titleformat_object> script;

		compiler->compile_safe(script, "$replace($caps2(%title%),' ','_')");
		p->format_title(NULL, title, script, NULL);

		// Iterate through all artists listed
		for (int j = 0; j < count; j++)
		{
			// Get Artist
			artist = info.meta_get("artist", j);

			artist.replace_char(' ', '_');

			//Fetching from HTTP
			// Set HTTP Address
			pfc::string8_fast url("http://lyrics.wikia.com/index.php?title=");

			//URL = http://lyrics.wikia.com/index.php?title=Blackmore%27s_Night:I_Guess_It_Doesn%27t_Matter_Anymore&action=edit

			url += fetcher.quote(artist);
			url += ":";
			url += fetcher.quote(title);


			// Get it now
			try
			{
				fetcher.fetch(url, buff);
			}
			catch (pfc::exception & e)
			{
				console_error(e.what());
				continue;
			}
			catch (...)
			{
				continue;
			}

			const char * regex_lyrics = "'lyricbox'(?P<instrumental>.*?)</div>(?P<lyrics>.*?)<!--";

			// expression for extract lyrics
			regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

			int noGroup = regexp.GetNamedGroupNumber("lyrics");
			int instGroup = regexp.GetNamedGroupNumber("instrumental");

			MatchResult result = regexp.Match(buff.get_ptr());

			if (result.IsMatched())
			{
				int nStart = result.GetGroupStart(instGroup);
				int nEnd = result.GetGroupEnd(instGroup);

				pfc::string8_fast test(buff.get_ptr() + nStart, nEnd-nStart);

				if (test.find_first("Instrumental") != -1)
				{
					return "[Instrumental]";
				}

				nStart = result.GetGroupStart(noGroup);
				nEnd = result.GetGroupEnd(noGroup);

				pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

				convert_html_to_plain(lyric);

				if (lyric.get_length() > 0)
				{
					string_helper::remove_beginning_linebreaks(lyric);
					string_helper::remove_end_linebreaks(lyric);
					return lyric;
				}
			}
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		return "";
	}
	catch (...)
	{
		return "";
	}

	return "";
}
Esempio n. 10
0
//************************************************************************
//*                              LyricWiki                               *
//************************************************************************
pfc::string_list_impl * provider_lyricwiki::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort)
{
	TRACK_CALL_TEXT("provider_lyricwiki::lookup");

	// Regular Expression Class
	CRegexpT<char> regexp;

	// Buffer
	pfc::string8 buff;
	pfc::string_list_impl * str_list = new pfc::string_list_impl;

	try
	{
		// Init fetcher
		curl_wrapper_simple fetcher(&m_config_item);

		for (t_size i = 0; i < p_meta.get_count(); ++i)
		{
			if (p_abort.is_aborting())
				break;

			// Sleep
			how_to_sleep(i);
			// Clear buff
			buff.reset();

			const metadb_handle_ptr & p = p_meta.get_item(i);

			if (p.is_empty())
			{
				str_list->add_item("");
				continue;
			}

			// Set progress
			pfc::string8_fast path = file_path_canonical(p->get_path());

			// add subsong index?
			if (p->get_subsong_index() > 0)
			{
				path.add_string(" /index:");
				path.add_string(pfc::format_uint(p->get_subsong_index()));
			}

			p_status.set_item_path(path);
			p_status.set_progress(i + 1, p_meta.get_count());

			pfc::string8_fast artist, title;

			file_info_impl info;
			p->get_info(info);

			// Get count of artists
			t_size count = info.meta_get_count_by_name("artist");

			// Get TITLE
			static_api_ptr_t<titleformat_compiler> compiler;
			service_ptr_t<titleformat_object> script;
	
			compiler->compile_safe(script, "$replace($caps2(%title%),' ','_')");
			p->format_title(NULL, title, script, NULL);

			bool found = false;

			// Iterate through all artists listed
			for (int j = 0; j < count && !found; j++)
			{
				// Get Artist
				artist = info.meta_get("artist", j);

				artist.replace_char(' ', '_');

				//Fetching from HTTP
				// Set HTTP Address
				pfc::string8_fast url("http://lyrics.wikia.com/index.php?title=");

				//URL = http://lyrics.wikia.com/index.php?title=Blackmore%27s_Night:I_Guess_It_Doesn%27t_Matter_Anymore&action=edit

				url += fetcher.quote(artist);
				url += ":";
				url += fetcher.quote(title);

				// Get it now
				try
				{
					fetcher.fetch(url, buff);
				}
				catch (pfc::exception & e)
				{
					console_error(e.what());
					continue;
				}
				catch (...)
				{
					continue;
				}

				const char * regex_lyrics = "'lyricbox'(?P<instrumental>.*?)</div>(?P<lyrics>.*?)<!--";

				// expression for extract lyrics
				regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE);

				int noGroup = regexp.GetNamedGroupNumber("lyrics");
				int instGroup = regexp.GetNamedGroupNumber("instrumental");

				MatchResult result = regexp.Match(buff.get_ptr());

				if (result.IsMatched())
				{
					int nStart = result.GetGroupStart(instGroup);
					int nEnd = result.GetGroupEnd(instGroup);

					pfc::string8_fast test(buff.get_ptr() + nStart, nEnd-nStart);

					if (test.find_first("Instrumental") != -1)
					{
						found = true;

						str_list->add_item("[Instrumental]");
						continue;
					}

					nStart = result.GetGroupStart(noGroup);
					nEnd = result.GetGroupEnd(noGroup);

					pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart);

					convert_html_to_plain(lyric);

					if (lyric.get_length() > 0)
					{
						found = true;

						string_helper::remove_beginning_linebreaks(lyric);
						string_helper::remove_end_linebreaks(lyric);

						str_list->add_item(lyric);
						continue;
					}
				}
			}
			if (found)
				continue;
			else
				str_list->add_item("");
		}
	}
	catch (pfc::exception & e)
	{
		console_error(e.what());
		delete str_list;
		return NULL;
	}
	catch (...)
	{
		delete str_list;
		return NULL;
	}

	return str_list;
}