BOOL GoogleLyricsProvider::GetNextResult(Result& result) { m_result.clear(); m_resultAdditionalInfo.clear(); ASSERT(m_curResult > -1);//Should be Initialized ASSERT(!m_Artist.empty() && !m_Track.empty()); if (m_Artist.empty() || m_Track.empty()) return FALSE; if (m_curResult == 0) { TRACE(_T("@3 GoogleLyricsProvider::GetResult(). Dowloading Google Page\r\n")); //std::tstring query = _T("http://www.google.com/search?q="); std::tstring query = _T("allintitle:\""); query += m_Artist; query += _T("\" \""); query += m_Track; switch (m_request.service) { case SRV_TrackLyrics: query += _T("\" lyrics"); break; case SRV_TrackTablatures: query += _T("\" chords"); break; default: ASSERT(0); return FALSE; } std::tstring fixString; URLEncode(fixString, query.c_str()); query = _T("http://www.google.com/search?q="); query += fixString; //URLEncode(fixString, m_Artist.c_str()); //query += fixString; //query += _T("\"+\""); //URLEncode(fixString, m_Track.c_str(); //query += fixString; //switch (m_request.service) //{ //case SRV_TrackLyrics: // query += _T("\"+lyrics"); // break; //case SRV_TrackTablatures: // query += _T("\"+chords"); // break; //default: // ASSERT(0); // return FALSE; //} //fixString.clear(); //URLEncode(fixString, query.c_str()); //CHAR bf[1000]; //WideCharToMultiByte(CP_ACP, 0, query.c_str(), -1, bf, 1000, 0, 0); //CHAR url[1000]; //DWORD bfLen = 1000; //BOOL ret = InternetCanonicalizeUrlA(bf, url, &bfLen, 0); //query = bf; //query = _T("http://www.google.com/search?q=allintitle%3A%22%CE%98%CE%B1%CE%BD%CE%AC%CF%83%CE%B7%CF%82%2B%CE%A0%CE%B1%CF%80%CE%B1%CE%BA%CF%89%CE%BD%CF%83%CF%84%CE%B1%CE%BD%CF%84%CE%AF%CE%BD%CE%BF%CF%85%22%2B%22%CE%89%CE%BC%CE%B5%CF%81%CE%BF%CF%82%2B%CE%8E%CF%80%CE%BD%CE%BF%CF%82%22%2Blyrics"); std::string page; if (DownloadWebPage(page, m_hNet, query.c_str())) { const INT cLinkLen = 1000; CHAR linkBuffer[cLinkLen]; LPCSTR startPos = page.c_str(); while (startPos != NULL) { startPos = strstr(startPos, "h3 class=\"r\">"); if (startPos) { startPos = strstr(startPos, "http:"); if (startPos) { LPCSTR endPos = NULL; if (startPos[-1] == '"') endPos = strchr(startPos + 1, '"'); else endPos = strchr(startPos + 1, '&'); if (endPos) { if (endPos - startPos < (INT)cLinkLen) { strncpy(linkBuffer, startPos, endPos - startPos); linkBuffer[endPos - startPos] = 0; if (IsBlackListed(linkBuffer) == FALSE) m_googleLinks.push_back(linkBuffer); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. link bigger than expected (FAILED)\r\n")); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find endPos '%0.150s'\r\n"), CA2CT(&startPos[-1])); startPos = endPos; } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find startPos (2) (FAILED)\r\n")); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find startPos (1) (FAILED)\r\n")); } if (m_googleLinks.empty()) { startPos = page.c_str(); //=== This happens on iGoogle while (startPos != NULL) { startPos = strstr(startPos, " class=r>"); if (startPos) { startPos = strstr(startPos, "http:"); if (startPos) { LPCSTR endPos = NULL; if (startPos[-1] == '"') endPos = strchr(startPos + 1, '"'); else endPos = strchr(startPos + 1, '&'); if (endPos) { if (endPos - startPos < (INT)cLinkLen) { strncpy(linkBuffer, startPos, endPos - startPos); linkBuffer[endPos - startPos] = 0; if (IsBlackListed(linkBuffer) == FALSE) m_googleLinks.push_back(linkBuffer); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) link bigger than expected (FAILED)\r\n")); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find endPos '%0.150s'\r\n"), CA2CT(&startPos[-1])); startPos = endPos; } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find startPos (2) (FAILED)\r\n")); } else TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find startPos (1) (FAILED)\r\n")); } } } else TRACE(_T("@1 GoogleLyricsProvider::NextResult(). Dowloading Google Page (FAILED)\r\n")); } std::wstring page; while (m_curResult < (INT)m_googleLinks.size()) { BOOL bResultFound = FALSE; if (DownloadWebPageUnicode(page, m_hNet, (LPCTSTR)CA2CT(m_googleLinks[m_curResult].c_str()))) { RemoveEnclosedString(page, _T("<!--"), _T("-->")); RemoveEnclosedString(page, _T("<head"), _T("/head>")); RemoveEnclosedString(page, _T("<script"), _T("/script>")); RemoveEnclosedString(page, _T("<a href="), _T("/a>")); ReplaceHtmlEntities(page); if (ExtractLyrics(page)) { //InlineHTML2Text(pResult.get()); //m_result = pResult.get(); m_result = page; LPCTSTR delims = _T(" \t\n\r"); m_result.erase(0, m_result.find_first_not_of(delims)); //Trim Left m_result.erase(m_result.find_last_not_of(delims) + 1); //Trim Right if (m_result.size() > 40) { //Keep Additional Info (The Web Site Provider) LPCSTR pLink = m_googleLinks[m_curResult].c_str(); LPCSTR sp = &pLink[7]; LPCSTR ep = strchr(&pLink[8], '/'); if (ep != 0) { CHAR SourceSiteU[MAX_PATH]; strncpy(SourceSiteU, sp, INT(ep - sp) > 99 ? 99 : INT(ep - sp)); SourceSiteU[INT(ep - sp) > 99 ? 99 : INT(ep - sp)] = 0; m_resultAdditionalInfo = (LPCTSTR)CA2CT(SourceSiteU); } else m_resultAdditionalInfo.clear(); bResultFound = TRUE; } } else TRACE(_T("@1 GoogleLyricsProvider::GetResult(). ExtractLyrics (FAILED)\r\n")); } else TRACE(_T("@1 GoogleLyricsProvider::GetResult(). Dowloading Lyrics Page (FAILED) '%s'\r\n"), (LPCTSTR)CA2CT(m_googleLinks[m_curResult].c_str())); m_curResult++; if (bResultFound) break; } if (m_result.empty()) return FALSE; result.main = m_result.c_str(); result.additionalInfo = m_resultAdditionalInfo.c_str(); result.service = m_request.service; return TRUE; }
BOOL TestWebPageUtilities() { UNITTESTSTART; for (int i = 0; i < sizeof(repEntitiesTest)/sizeof(LPCTSTR); i+=2) { std::wstring test(repEntitiesTest[i]); ReplaceHtmlEntities(test); UNITTEST(test == repEntitiesTest[i+1]); } //UNITTEST(ReplaceHtmlEntities(std::wstring(_T("Only & and && tests for &"))) == 0); //UNITTEST(ReplaceHtmlEntities(std::wstring(_T("This is a test &rt;&<''' for & at the end '"))) == 8); //UNITTEST(ReplaceHtmlEntities(std::wstring(_T(""))) == 1); HINTERNET hNet = InternetOpen(_T("UnitTest"), INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, NULL); std::string page; if (hNet) { page.clear(); UNITTEST(!DownloadWebPage(page, hNet, _T("123567"))); UNITTEST(!DownloadWebPage(page, hNet, _T(""))); UNITTEST(page.empty()); if (DownloadWebPage(page, hNet, _T("http://www.in.gr/"))) UNITTEST(!page.empty()); std::wstring pageU; if (DownloadWebPageUnicode(pageU, hNet, _T("http://www.in.gr/"))) { UNITTEST(!page.empty()); std::wstring UnicodeAnsiPage; INT cp = GetWindowsCodePageA(page.c_str()); if (cp == 0) cp = 1252; Ansi2Unicode(UnicodeAnsiPage, page, cp); UNITTEST(UnicodeAnsiPage == pageU); } InternetCloseHandle(hNet); } page = "<html content=\"text/html; charset=windows-452\">This is a & test</html>"; UNITTEST(GetWindowsCodePageA(page.c_str()) == 452); UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=UTF-8\">") == 65001); UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=utf-8\">") == 65001); UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=iso-8859-7\"") == 28597); UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=iso-8859-1\"") == 28591); std::wstring pageU; Ansi2Unicode(pageU, page); UNITTEST(ReplaceHtmlEntities(pageU) == 1); UNITTEST(pageU == _T("<html content=\"text/html; charset=windows-452\">This is a & test</html>")); std::tstring text; GetTextFromHtmlFragment(text, pageU.c_str(), &pageU.c_str()[page.size() - 1]); UNITTEST(text == _T("This is a & test")); int a = (int) text.size(); LPCTSTR s = text.c_str(); return TRUE; }
BOOL LastFMServices::HandShake(std::basic_string<TCHAR>& sessionID, std::basic_string<TCHAR>& nowPlayingURL, std::basic_string<TCHAR>& submissionURL) { sessionID.clear(); nowPlayingURL.clear(); submissionURL.clear(); if (m_username.empty() || m_MD5password.empty()) { //TRACE(_T("@4 LastFMServices::HandShake. Empty Authorization.\r\n")); m_error = ERR_BadAuth; return FALSE; } SYSTEMTIME st; GetSystemTime(&st); UINT ts = SystemTime2UnixTimeStamp(st); LPCTSTR auth = _T("auth"); cMD5 md5; CHAR authStr[100]; _snprintf(authStr, 100, "%s%u", (LPCSTR)CT2CA(m_MD5password.c_str()), ts); //http://post.audioscrobbler.com/?hs=true&p=1.2.1&c=<client-id>&v=<client-ver>&u=<user>&t=<timestamp>&a=<auth> TCHAR url[2000]; _sntprintf(url, 2000, _T("http://post.audioscrobbler.com/?hs=true&p=%s&c=%s&v=%s&u=%s&t=%u&a=%.32s"), sProtocolVer, sClientID, sClientVer, m_username.c_str(), ts, (LPCTSTR)CA2CT(_strlwr(md5.CalcMD5FromString(authStr)))); TRACE(_T("About to request: '%s'\r\n"), url); m_state = ST_Connecting; std::string page; if (DownloadWebPage(page, m_hNet, url)) { std::string delimiter = "\n"; std::string line; INT pos = getToken(page, 0, delimiter, line); INT count = 0; while (pos != -1) { switch (count) { case 0: m_errorString = line; if (line == "OK") m_error = ERR_None; else if (line == "BANNED") m_error = ERR_Banned; else if (line == "BADAUTH") m_error = ERR_BadAuth; else if (line == "BADTIME") m_error = ERR_BadTime; else m_error = ERR_Failed; break; case 1: sessionID = (LPCTSTR)CA2CT(line.c_str()); break; case 2: nowPlayingURL = (LPCTSTR)CA2CT(line.c_str()); break; case 3: submissionURL = (LPCTSTR)CA2CT(line.c_str()); break; default: break; } if (m_error != ERR_None) break; count++; pos = getToken(page, pos, delimiter, line); } if (m_error == ERR_None) { TRACE(_T("@4 LastFMServices::HandShake. success.\r\n")); m_state = ST_Idle; } else { TRACE(_T("@4 LastFMServices::HandShake. fail: '%s'.\r\n"), CA2CT(GetErrorString())); m_state = ST_Error; } } else //---Download page failed { m_error = ERR_CommunicationError; m_state = ST_Error; TRACE(_T("@4 LastFMServices::HandShake. fail: Communication Error.\r\n")); } return m_state == ST_Idle; }