// 仅用来搜索关键字,结果保存在char* resultFile中 void SearchKeysInFile(const char* file,const char * keys,const char * resultFile) { // text //char * text = "12.5, a1.1, 0.123, 178"; // declare //static CRegexpT <char> regexp("\\b\\d+\\.\\d+"); //char * key=m_Keyword.GetBuffer(); static CRegexpT <char> regexp(keys); CContext * pContext; //读文件,分块查找 std::ifstream ifs; ifs.open(file,ifstream::binary); ifs.seekg(0,ifstream::beg); std::ofstream ofs; ofs.open(resultFile,ofstream::binary); ofs.clear(); //ofs.seekg(0,ofstream::beg); char * pBuffer=new char[BLOCK_SIZE]; while(!ifs.eof()) { ifs.read(pBuffer,BLOCK_SIZE); ////////////////////////////////////////////////////////////////////////// //分块查找 // prepare pContext= regexp.PrepareMatch(pBuffer); // loop MatchResult result = regexp.Match(pContext); while( result.IsMatched() ) { // 写入结果文件 // printf("%.*s\n", result.GetEnd() - result.GetStart(), pBuffer + result.GetStart()); // 先转换,再写入 ofs.write(pBuffer + result.GetStart(),result.GetEnd() - result.GetStart()); ofs<<endl; ofs<<"——————————————————————"<<endl; // get next result = regexp.Match(pContext); } } // 搜索成功结束 ifs.close(); ofs.close(); // release regexp.ReleaseContext(pContext); delete[] pBuffer; }
int test_all_number( const char *string ) { //declare static CRegexpT <char> regexp("\\d+"); //test MatchResult result = regexp.MatchExact( string ); //matched or not return result.IsMatched(); }
bool Foam::regExp::match(const std::string& str) const { bool isExactMatch = false; if (preg_ && !str.empty()) { const MatchResult result = preg_->MatchExact(str.c_str()); isExactMatch = (0 < result.IsMatched()); } return isExactMatch; }
CString CCommands::XApplicationEvents::ReplaceRegexpData( IN CString Data, IN LPCTSTR lpRegexp, IN DWORD dwRegexpType, IN LPCTSTR lpRegexpReplace ) { CRegexpT <TCHAR> regexp(lpRegexp, dwRegexpType); CString strRetData; MatchResult result = regexp.Match(Data); if (result.IsMatched()) { TCHAR * cReplace = regexp.Replace(Data, lpRegexpReplace); strRetData = cReplace; regexp.ReleaseString(cReplace); } return strRetData; }
std::string::size_type Foam::regExp::find(const std::string& str) const { std::string::size_type pos = std::string::npos; if (preg_ && !str.empty()) { const MatchResult result = preg_->Match(str.c_str()); if (0 < result.IsMatched()) { pos = result.GetStart(); } } return pos; }
int main(int argc, char * argv[]) { std::ifstream fs("C:\\Lookup\\112.127.141.86.html"); std::string in; load_file(in, fs); fs.close(); //static CRegexpT <char> regexp1("\\d+"); static CRegexpT <char> regexp1("target=\\\"_blank\\\"\\>(.+?)\\</a\\>\\</td\\>"); // loop MatchResult result1 = regexp1.Match(in.c_str()); while( result1.IsMatched() ) { //GetEnd匹配成功后,获取所匹配到的子字符串的结束位置。如果匹配失败,则返回负值。 //GetStart匹配成功后,获取所匹配到的子字符串的开始位置。如果匹配失败,则返回负值。 printf("%.*s\n", result1.GetEnd() - result1.GetStart(), in.c_str() + result1.GetStart()); // get next result1 = regexp1.Match(in.c_str(), result1.GetEnd()); //返回匹配结果 MatchResult 对象。 // 通过 MatchResult 对象,可以得知是否匹配成功。如果成功,通过 MatchResult 对象可以获取捕获信息。 } // text char * text = "http://www.cppprog.com/2009/0112/48.html"; // declare static CRegexpT <char> regexp("\\d+"); // loop MatchResult result = regexp.Match(text); //IsMatched返回非零值表示匹配成功,返回 0 表示匹配失败。 while( result.IsMatched() ) { //GetEnd匹配成功后,获取所匹配到的子字符串的结束位置。如果匹配失败,则返回负值。 //GetStart匹配成功后,获取所匹配到的子字符串的开始位置。如果匹配失败,则返回负值。 printf("%.*s\n", result.GetEnd() - result.GetStart(), text + result.GetStart()); // get next result = regexp.Match(text, result.GetEnd()); //返回匹配结果 MatchResult 对象。 // 通过 MatchResult 对象,可以得知是否匹配成功。如果成功,通过 MatchResult 对象可以获取捕获信息。 } return 0; }
bool Foam::regExp::match(const string& str, List<string>& groups) const { bool isMatch = false; if (preg_ && !str.empty()) { const MatchResult results = preg_->MatchExact(str.c_str()); isMatch = (0 < results.IsMatched()); if (isMatch) { int const notFound = -1; int start, end; const int groupsCount = results.MaxGroupNumber(); groups.setSize(groupsCount); for (int i = 0; groupsCount > i; ++i) { start = results.GetGroupStart(i); end = results.GetGroupEnd(i); if ((notFound < start) && (notFound < end)) { groups[i] = str.substr(start, end - start); } else { groups[i].clear(); } } } } if (!isMatch) { groups.clear(); } return isMatch; }
CString CCommands::XApplicationEvents::GetRegexpData( IN CString Data, IN LPCTSTR lpRegexp, IN DWORD dwRegexpType, LPCTSTR sNamedGroup /*= NULL*/ ) { CRegexpT <TCHAR> regexp(lpRegexp, dwRegexpType); CString strRetData; MatchResult result = regexp.Match(Data); if (result.IsMatched()) { int iStart, iEnd; if (sNamedGroup == NULL) { iStart = result.GetGroupStart(1); //得到 捕获组\1 iEnd = result.GetGroupEnd(1); if (iStart < 0 || iEnd < 0) { return ""; } strRetData = Data.Mid(iStart, iEnd - iStart); } else { int iIndex; iIndex = regexp.GetNamedGroupNumber(sNamedGroup); if (iIndex < 0) { return ""; } iStart = result.GetGroupStart(iIndex); iEnd = result.GetGroupEnd(iIndex); strRetData = Data.Mid(iStart, iEnd - iStart); } } return strRetData; }
int Str_RegExMatch( LPCTSTR pPattern, LPCTSTR pText, TCHAR * lastError ) { try { CRegexp expressionformatch(pPattern, NO_FLAG); MatchResult result = expressionformatch.Match(pText); if( result.IsMatched() ) return 1; return 0; } catch (std::bad_alloc e) { strcpylen(lastError,e.what(),SCRIPT_MAX_LINE_LEN); CurrentProfileData.Count(PROFILE_STAT_FAULTS, 1); return -1; } catch ( ... ) { strcpylen(lastError,"Unknown",SCRIPT_MAX_LINE_LEN); CurrentProfileData.Count(PROFILE_STAT_FAULTS, 1); return -1; } }
pfc::string8 provider_darklyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_darklyrics::lookup_one"); const float threshold = 0.8f; const pfc::string8 site = "darklyrics.com"; // Regular Expression Class CRegexpT<char> regexp; MatchResult match; // Buffer pfc::string8 buff; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); const metadb_handle_ptr & p = p_meta; if (p.is_empty()) { return ""; } pfc::string8_fast artist, title, album, keywords; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("album"); if (count == 0) return ""; // Get Album album = info.meta_get("album", 0); count = info.meta_get_count_by_name("title"); if (count == 0) return ""; // Get TITLE title = info.meta_get("title", 0); count = info.meta_get_count_by_name("artist"); // Iterate through all artists listed for (int j = 0; j < count; j++) { // Get Artist artist = info.meta_get("artist", j); //Fetching from HTTP keywords = artist; keywords += "+"; keywords += album; keywords.replace_char(' ', '+'); // Get it now try { fetcher.fetch_googleluck(site, keywords, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_ahref = "<a\\shref=\"#(?P<no>\\d+)\">(?P<text>.+?)</a>"; // expression for extract lyrics regexp.Compile(regex_ahref, IGNORECASE); // match MatchResult result = regexp.Match(buff.get_ptr()); int noGroup = regexp.GetNamedGroupNumber("no"); int textGroup = regexp.GetNamedGroupNumber("text"); int jump_to = 0; pfc::string8_fast compare = title; compare.insert_chars(0, ". "); float good; float best = 0.0f; while (result.IsMatched()) { int gStart = result.GetGroupStart(noGroup); int gEnd = result.GetGroupEnd(noGroup); pfc::string8_fast temp(buff.get_ptr() + gStart, gEnd - gStart); int no = StrToIntA(temp); gStart = result.GetGroupStart(textGroup); gEnd = result.GetGroupEnd(textGroup); temp = pfc::string8_fast(buff.get_ptr()+gStart, gEnd - gStart); int levDist = LD(compare, compare.get_length(), temp, temp.get_length()); good = 1.0f - (levDist / (float)compare.get_length()); if (good >= threshold && good > best) { jump_to = no; best = good; } result = regexp.Match(buff.get_ptr(),result.GetEnd()); } if (jump_to == 0) { continue; } char regex_lyrics[100]; sprintf(regex_lyrics, "<a\\s+name=%d><font*(.*?)</font*(.*?)>(?P<lyrics>.+?)<font", jump_to); // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); noGroup = regexp.GetNamedGroupNumber("lyrics"); result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(noGroup); int nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); string_helper::remove_beginning(lyric, ' '); string_helper::remove_beginning(lyric, '\t'); return lyric; } } } } catch (pfc::exception & e) { console_error(e.what()); return ""; } catch (...) { return ""; } return ""; }
//************************************************************************ //* Dark Lyrics * //************************************************************************ pfc::string_list_impl * provider_darklyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_darklyrics::lookup"); const float threshold = 0.8f; const pfc::string8 site = "darklyrics.com"; // Regular Expression Class CRegexpT<char> regexp; MatchResult match; // Buffer pfc::string8 buff; pfc::string_list_impl * str_list = new pfc::string_list_impl; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); for (t_size i = 0; i < p_meta.get_count(); ++i) { if (p_abort.is_aborting()) break; // Sleep how_to_sleep(i); // Clear buff buff.reset(); const metadb_handle_ptr & p = p_meta.get_item(i); if (p.is_empty()) { str_list->add_item(""); continue; } // Set progress pfc::string8_fast path = file_path_canonical(p->get_path()); // add subsong index? if (p->get_subsong_index() > 0) { path.add_string(" /index:"); path.add_string(pfc::format_uint(p->get_subsong_index())); } p_status.set_item_path(path); p_status.set_progress(i + 1, p_meta.get_count()); pfc::string8_fast artist, title, album, keywords; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("album"); if (count == 0) continue; // Get Album album = info.meta_get("album", 0); count = info.meta_get_count_by_name("title"); if (count == 0) continue; // Get TITLE title = info.meta_get("title", 0); count = info.meta_get_count_by_name("artist"); bool found = false; // Iterate through all artists listed for (int j = 0; j < count && !found; j++) { // Get Artist artist = info.meta_get("artist", j); keywords = artist; keywords += "+"; keywords += album; keywords.replace_char(' ', '+'); // Get it now try { fetcher.fetch_googleluck(site, keywords, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_ahref = "<a\\shref=\"#(?P<no>\\d+)\">(?P<text>.+?)</a>"; // expression for extract lyrics regexp.Compile(regex_ahref, IGNORECASE | SINGLELINE); // match MatchResult result = regexp.Match(buff.get_ptr()); int noGroup = regexp.GetNamedGroupNumber("no"); int textGroup = regexp.GetNamedGroupNumber("text"); int jump_to = 0; pfc::string8_fast compare = title; compare.insert_chars(0, ". "); float good; float best = 0.0f; while (result.IsMatched()) { int gStart = result.GetGroupStart(noGroup); int gEnd = result.GetGroupEnd(noGroup); pfc::string8_fast temp(buff.get_ptr() + gStart, gEnd - gStart); int no = StrToIntA(temp); gStart = result.GetGroupStart(textGroup); gEnd = result.GetGroupEnd(textGroup); temp = pfc::string8_fast(buff.get_ptr()+gStart, gEnd - gStart); if (temp.find_first(title) != -1) { jump_to = no; break; } int levDist = LD(compare, compare.get_length(), temp, temp.get_length()); good = 1.0f - (levDist / (float)compare.get_length()); if (good >= threshold && good > best) { jump_to = no; best = good; } result = regexp.Match(buff.get_ptr(),result.GetEnd()); } if (jump_to == 0) { continue; } char regex_lyrics[100]; sprintf(regex_lyrics, "<a\\s+name=%d><font*(.*?)</font*(.*?)>(?P<lyrics>.+?)<font", jump_to); // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); noGroup = regexp.GetNamedGroupNumber("lyrics"); result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(noGroup); int nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); string_helper::remove_beginning(lyric, ' '); string_helper::remove_beginning(lyric, '\t'); console::print(lyric); str_list->add_item(lyric); found = true; continue; } } } if (found) continue; else str_list->add_item(""); } } catch (pfc::exception & e) { console_error(e.what()); delete str_list; return NULL; } catch (...) { delete str_list; return NULL; } return str_list; }
pfc::string8 provider_lyricsplugin::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort) { // Regular Expression Class CRegexpT<char> regexp; MatchResult match; // Buffer pfc::string8 buff; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); // Clear buff buff.reset(); const metadb_handle_ptr & p = p_meta; if (p.is_empty()) { return ""; } pfc::string8_fast artist, title; static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE compiler->compile_safe(script, "[%title%]"); p->format_title(NULL, title, script, NULL); bool found = false; // Iterate through all artists listed for (int j = 0; j < count && !found; j++) { // Get Artist artist = info.meta_get("artist", j); // Fetching from HTTP // Set HTTP Address pfc::string8_fast url("http://www.squirrelscript.net/mediamonkey/Lyricator/lyrics.php?artist="); // URL = http://www.squirrelscript.net/mediamonkey/Lyricator/lyrics.php?artist=<Artist>&title=<Title> url += fetcher.quote(artist); url += "&title="; url += fetcher.quote(title); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_lyricbox = "<div\\s+id\\s*?=\\s*?\"lyrics\"\\s*?>[\\r\\n]*(.*?)[\\r\\n]*</div>"; // expression for extract lyrics regexp.Compile(regex_lyricbox, SINGLELINE); // match MatchResult result = regexp.Match(buff.get_ptr()); // Get Group if (result.IsMatched()) { int nStart = result.GetGroupStart(1); int nEnd = result.GetGroupEnd(1); int index; pfc::string8_fast lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); index = lyric.find_first("www.tunerankings.com"); if (index == 0) { continue; } else if (index != -1) { lyric.remove_chars(index, 20); } if (string_trim(lyric).get_length() > 0) { return lyric; } } } } catch (pfc::exception & e) { console_error(e.what()); return ""; } catch (...) { return ""; } return ""; }
pfc::string8 provider_leoslyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_leoslyrics::lookup_one"); // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); const metadb_handle_ptr & p = p_meta; if (p.is_empty()) { return ""; } pfc::string8_fast artist, title, album; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; compiler->compile_safe(script, "%title%"); p->format_title(NULL, title, script, NULL); // Iterate through all artists listed for (int j = 0; j < count; j++) { // Get Artist artist = info.meta_get("artist", j); //Fetching from HTTP // Set HTTP Address pfc::string8_fast url("http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist="); pfc::string8_fast host("api.leoslyrics.com"); //URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=<artist>&songtitle=<title> url += fetcher.quote(artist); url += "&songtitle="; url += fetcher.quote(title); // Get it now try { fetcher.fetch_host(host, url, buff, p_abort); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_hid = "code=\"(?P<code>.).*?hid=\"(?P<hid>.*?)\""; // expression for extract lyrics regexp.Compile(regex_hid, IGNORECASE | SINGLELINE); pfc::string8_fast hid; int codeGroup = regexp.GetNamedGroupNumber("code"); int hidGroup = regexp.GetNamedGroupNumber("hid"); MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(codeGroup); int nEnd = result.GetGroupEnd(codeGroup); pfc::string8_fast code(buff.get_ptr() + nStart, nEnd-nStart); if (code.find_first("0") == -1) continue; nStart = result.GetGroupStart(hidGroup); nEnd = result.GetGroupEnd(hidGroup); hid = pfc::string8_fast(buff.get_ptr() + nStart, nEnd - nStart); url = "http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid="; url += hid; //URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=<songID> try { fetcher.fetch_host(host, url, buff, p_abort); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_hid = "<text>\\s(?P<lyrics>.*?)\\s</text>"; // expression for extract lyrics regexp.Compile(regex_hid, IGNORECASE | SINGLELINE); int lyricsGroup = regexp.GetNamedGroupNumber("lyrics"); result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { nStart = result.GetGroupStart(lyricsGroup); nEnd = result.GetGroupEnd(lyricsGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); if (lyric.get_length() > 0) { string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); return lyric; } } } } } catch (pfc::exception & e) { console_error(e.what()); return ""; } catch (...) { return ""; } return ""; }
//************************************************************************ //* Leo's Lyrics * //************************************************************************ pfc::string_list_impl * provider_leoslyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_leoslyrics::lookup"); // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; pfc::string_list_impl * str_list = new pfc::string_list_impl; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); for (t_size i = 0; i < p_meta.get_count(); ++i) { if (p_abort.is_aborting()) break; // Sleep how_to_sleep(i); // Clear buff buff.reset(); const metadb_handle_ptr & p = p_meta.get_item(i); if (p.is_empty()) { str_list->add_item(""); continue; } // Set progress pfc::string8_fast path = file_path_canonical(p->get_path()); // add subsong index? if (p->get_subsong_index() > 0) { path.add_string(" /index:"); path.add_string(pfc::format_uint(p->get_subsong_index())); } p_status.set_item_path(path); p_status.set_progress(i + 1, p_meta.get_count()); pfc::string8_fast artist, title; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; compiler->compile_safe(script, "%title%"); p->format_title(NULL, title, script, NULL); bool found = false; // Iterate through all artists listed for (int j = 0; j < count && !found; j++) { // Get Artist artist = info.meta_get("artist", j); pfc::string8_fast url("http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist="); pfc::string8_fast host("api.leoslyrics.com"); //URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&artist=<artist>&songtitle=<title> url += fetcher.quote(artist); url += "&songtitle="; url += fetcher.quote(title); // Get it now try { fetcher.fetch_host(host, url, buff, p_abort); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_hid = "code=\"(?P<code>.).*?hid=\"(?P<hid>.*?)\""; // expression for extract lyrics regexp.Compile(regex_hid, IGNORECASE | SINGLELINE); pfc::string8_fast hid; int codeGroup = regexp.GetNamedGroupNumber("code"); int hidGroup = regexp.GetNamedGroupNumber("hid"); MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(codeGroup); int nEnd = result.GetGroupEnd(codeGroup); pfc::string8_fast code(buff.get_ptr() + nStart, nEnd-nStart); if (code.find_first("0") == -1) continue; nStart = result.GetGroupStart(hidGroup); nEnd = result.GetGroupEnd(hidGroup); hid = pfc::string8_fast(buff.get_ptr() + nStart, nEnd - nStart); url = "http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid="; url += hid; //URL = http://77.79.210.222/api_search.php?auth=LeosLyrics5&hid=<songID> try { fetcher.fetch_host(host, url, buff, p_abort); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_hid = "<text>\\s(?P<lyrics>.*?)\\s</text>"; // expression for extract lyrics regexp.Compile(regex_hid, IGNORECASE | SINGLELINE); int lyricsGroup = regexp.GetNamedGroupNumber("lyrics"); result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { nStart = result.GetGroupStart(lyricsGroup); nEnd = result.GetGroupEnd(lyricsGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); if (lyric.get_length() > 0) { found = true; string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); str_list->add_item(lyric); continue; } } } } if (found) continue; else str_list->add_item(""); } } catch (pfc::exception & e) { console_error(e.what()); delete str_list; return NULL; } catch (...) { delete str_list; return NULL; } return str_list; }
pfc::string8 provider_lyricwiki::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_lyricwiki::lookup_one"); // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); const metadb_handle_ptr & p = p_meta; if (p.is_empty()) { return ""; } pfc::string8_fast artist, title, album; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; compiler->compile_safe(script, "$replace($caps2(%title%),' ','_')"); p->format_title(NULL, title, script, NULL); // Iterate through all artists listed for (int j = 0; j < count; j++) { // Get Artist artist = info.meta_get("artist", j); artist.replace_char(' ', '_'); //Fetching from HTTP // Set HTTP Address pfc::string8_fast url("http://lyrics.wikia.com/index.php?title="); //URL = http://lyrics.wikia.com/index.php?title=Blackmore%27s_Night:I_Guess_It_Doesn%27t_Matter_Anymore&action=edit url += fetcher.quote(artist); url += ":"; url += fetcher.quote(title); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_lyrics = "'lyricbox'(?P<instrumental>.*?)</div>(?P<lyrics>.*?)<!--"; // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); int noGroup = regexp.GetNamedGroupNumber("lyrics"); int instGroup = regexp.GetNamedGroupNumber("instrumental"); MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(instGroup); int nEnd = result.GetGroupEnd(instGroup); pfc::string8_fast test(buff.get_ptr() + nStart, nEnd-nStart); if (test.find_first("Instrumental") != -1) { return "[Instrumental]"; } nStart = result.GetGroupStart(noGroup); nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); return lyric; } } } } catch (pfc::exception & e) { console_error(e.what()); return ""; } catch (...) { return ""; } return ""; }
//************************************************************************ //* AZ Lyrics * //************************************************************************ pfc::string_list_impl * provider_azlyrics::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_azlyrics::lookup"); const float threshold = 0.8f; // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; pfc::string_list_impl * str_list = new pfc::string_list_impl; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); for (t_size i = 0; i < p_meta.get_count(); ++i) { if (p_abort.is_aborting()) break; // Sleep how_to_sleep(i); // Clear buff buff.reset(); const metadb_handle_ptr & p = p_meta.get_item(i); if (p.is_empty()) { str_list->add_item(""); continue; } // Set progress pfc::string8_fast path = file_path_canonical(p->get_path()); // add subsong index? if (p->get_subsong_index() > 0) { path.add_string(" /index:"); path.add_string(pfc::format_uint(p->get_subsong_index())); } p_status.set_item_path(path); p_status.set_progress(i + 1, p_meta.get_count()); pfc::string8_fast artist, title; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE title = info.meta_get("title", 0); bool found = false; // Iterate through all artists listed for (int j = 0; j < count && !found; j++) { // Get Artist artist = info.meta_get("artist", j); // Search the lyrics pfc::string8_fast url("http://search.azlyrics.com/search.php?q="); url += fetcher.quote(artist); url += "+"; url += fetcher.quote(title); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } int resultStart = buff.find_first("<b>1.</b>"); int startUrl = buff.find_first("<a href=\"", resultStart) + 9; int endUrl = buff.find_first("\"", startUrl); url = pfc::string8_fast(buff.get_ptr()+startUrl, endUrl - startUrl); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_lyrics = "<!-- END OF RINGTONE 1 -->\\s*?<b>\"(?P<title>.*?)\"</b><br>\\s*?<br>\\s\\s(?P<lyrics>.*?)\\s<br>"; // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); int noGroup = regexp.GetNamedGroupNumber("lyrics"); int noTitle = regexp.GetNamedGroupNumber("title"); // match MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(noTitle); int nEnd = result.GetGroupEnd(noTitle); pfc::string8_fast songTitle(buff.get_ptr() +nStart, nEnd - nStart); int levDist = LD(title, title.get_length(), songTitle, songTitle.get_length()); float good = 1.0f - ((float)levDist / title.get_length()); if (good < threshold) continue; nStart = result.GetGroupStart(noGroup); nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { string_helper::remove_end_linebreaks(lyric); str_list->add_item(lyric); found = true; continue; } } } if (found) continue; else str_list->add_item(""); } } catch (pfc::exception & e) { console_error(e.what()); delete str_list; return NULL; } catch (...) { delete str_list; return NULL; } return str_list; }
pfc::string8 provider_azlyrics::lookup_one(unsigned p_index, const metadb_handle_ptr & p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_azlyrics::lookup_one"); const float threshold = 0.8f; // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; try { TRACK_CALL_TEXT("Try"); // Init fetcher curl_wrapper_simple fetcher(&m_config_item); const metadb_handle_ptr & p = p_meta; if (p.is_empty()) { return ""; } pfc::string8_fast artist, title, album; static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE title = info.meta_get("title", 0); // Iterate through all artists listed for (int j = 0; j < count; j++) { TRACK_CALL_TEXT("For"); // Get Artist artist = info.meta_get("artist", j); console::printf("%s - %s", artist, title); // Search the lyrics pfc::string8_fast url("http://search.azlyrics.com/search.php?q="); url += fetcher.quote(artist); url += "+"; url += fetcher.quote(title); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } if (buff.get_length() == 0) continue; int resultStart = buff.find_first("<b>1.</b>"); int startUrl = buff.find_first("<a href=\"", resultStart) + 9; int endUrl = buff.find_first("\"", startUrl); url = pfc::string8_fast(buff.get_ptr()+startUrl, endUrl - startUrl); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } if (buff.get_length() == 0) continue; const char * regex_lyrics = "<!-- END OF RINGTONE 1 -->\\s*?<b>\"(?P<title>.*?)\"</b><br>\\s*?<br>\\s\\s(?P<lyrics>.*?)\\s<br>"; // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); int noGroup = regexp.GetNamedGroupNumber("lyrics"); int noTitle = regexp.GetNamedGroupNumber("title"); MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(noTitle); int nEnd = result.GetGroupEnd(noTitle); pfc::string8_fast songTitle(buff.get_ptr() +nStart, nEnd - nStart); int levDist = LD(title, title.get_length(), songTitle, songTitle.get_length()); float good = 1.0f - ((float)levDist / title.get_length()); if (good < threshold) return ""; nStart = result.GetGroupStart(noGroup); nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { string_helper::remove_end_linebreaks(lyric); return lyric; } } } } catch (pfc::exception & e) { console_error(e.what()); return ""; } catch (...) { return ""; } return ""; }
//************************************************************************ //* LyricWiki * //************************************************************************ pfc::string_list_impl * provider_lyricwiki::lookup(unsigned p_index, metadb_handle_list_cref p_meta, threaded_process_status & p_status, abort_callback & p_abort) { TRACK_CALL_TEXT("provider_lyricwiki::lookup"); // Regular Expression Class CRegexpT<char> regexp; // Buffer pfc::string8 buff; pfc::string_list_impl * str_list = new pfc::string_list_impl; try { // Init fetcher curl_wrapper_simple fetcher(&m_config_item); for (t_size i = 0; i < p_meta.get_count(); ++i) { if (p_abort.is_aborting()) break; // Sleep how_to_sleep(i); // Clear buff buff.reset(); const metadb_handle_ptr & p = p_meta.get_item(i); if (p.is_empty()) { str_list->add_item(""); continue; } // Set progress pfc::string8_fast path = file_path_canonical(p->get_path()); // add subsong index? if (p->get_subsong_index() > 0) { path.add_string(" /index:"); path.add_string(pfc::format_uint(p->get_subsong_index())); } p_status.set_item_path(path); p_status.set_progress(i + 1, p_meta.get_count()); pfc::string8_fast artist, title; file_info_impl info; p->get_info(info); // Get count of artists t_size count = info.meta_get_count_by_name("artist"); // Get TITLE static_api_ptr_t<titleformat_compiler> compiler; service_ptr_t<titleformat_object> script; compiler->compile_safe(script, "$replace($caps2(%title%),' ','_')"); p->format_title(NULL, title, script, NULL); bool found = false; // Iterate through all artists listed for (int j = 0; j < count && !found; j++) { // Get Artist artist = info.meta_get("artist", j); artist.replace_char(' ', '_'); //Fetching from HTTP // Set HTTP Address pfc::string8_fast url("http://lyrics.wikia.com/index.php?title="); //URL = http://lyrics.wikia.com/index.php?title=Blackmore%27s_Night:I_Guess_It_Doesn%27t_Matter_Anymore&action=edit url += fetcher.quote(artist); url += ":"; url += fetcher.quote(title); // Get it now try { fetcher.fetch(url, buff); } catch (pfc::exception & e) { console_error(e.what()); continue; } catch (...) { continue; } const char * regex_lyrics = "'lyricbox'(?P<instrumental>.*?)</div>(?P<lyrics>.*?)<!--"; // expression for extract lyrics regexp.Compile(regex_lyrics, IGNORECASE | SINGLELINE); int noGroup = regexp.GetNamedGroupNumber("lyrics"); int instGroup = regexp.GetNamedGroupNumber("instrumental"); MatchResult result = regexp.Match(buff.get_ptr()); if (result.IsMatched()) { int nStart = result.GetGroupStart(instGroup); int nEnd = result.GetGroupEnd(instGroup); pfc::string8_fast test(buff.get_ptr() + nStart, nEnd-nStart); if (test.find_first("Instrumental") != -1) { found = true; str_list->add_item("[Instrumental]"); continue; } nStart = result.GetGroupStart(noGroup); nEnd = result.GetGroupEnd(noGroup); pfc::string8 lyric(buff.get_ptr() + nStart, nEnd - nStart); convert_html_to_plain(lyric); if (lyric.get_length() > 0) { found = true; string_helper::remove_beginning_linebreaks(lyric); string_helper::remove_end_linebreaks(lyric); str_list->add_item(lyric); continue; } } } if (found) continue; else str_list->add_item(""); } } catch (pfc::exception & e) { console_error(e.what()); delete str_list; return NULL; } catch (...) { delete str_list; return NULL; } return str_list; }