BOOL GoogleLyricsProvider::GetNextResult(Result& result)
{
	m_result.clear();
	m_resultAdditionalInfo.clear();
	ASSERT(m_curResult > -1);//Should be Initialized
	ASSERT(!m_Artist.empty() && !m_Track.empty());
	if (m_Artist.empty() || m_Track.empty())
		return FALSE;

	if (m_curResult == 0)
	{
		TRACE(_T("@3 GoogleLyricsProvider::GetResult(). Dowloading Google Page\r\n"));
		//std::tstring query = _T("http://www.google.com/search?q=");
		std::tstring query = _T("allintitle:\"");
		query += m_Artist;
		query += _T("\" \"");
		query += m_Track;
		switch (m_request.service)
		{
		case SRV_TrackLyrics:
			query += _T("\" lyrics");
			break;
		case SRV_TrackTablatures:
			query += _T("\" chords");
			break;
		default:
			ASSERT(0);
			return FALSE;
		}
		std::tstring fixString;
		URLEncode(fixString, query.c_str());
		query = _T("http://www.google.com/search?q=");
		query += fixString;




		//URLEncode(fixString, m_Artist.c_str());
		//query += fixString;
		//query += _T("\"+\"");
		//URLEncode(fixString, m_Track.c_str();
		//query += fixString;
		//switch (m_request.service)
		//{
		//case SRV_TrackLyrics:
		//	query += _T("\"+lyrics");
		//	break;
		//case SRV_TrackTablatures:
		//	query += _T("\"+chords");
		//	break;
		//default:
		//	ASSERT(0);
		//	return FALSE;
		//}

		
		//fixString.clear();
		//URLEncode(fixString, query.c_str());
		

		//CHAR bf[1000];
		//WideCharToMultiByte(CP_ACP, 0, query.c_str(), -1, bf, 1000, 0, 0);

		//CHAR url[1000];
		//DWORD bfLen = 1000;
		//BOOL ret = InternetCanonicalizeUrlA(bf, url, &bfLen, 0);
		//query = bf;

		//query = _T("http://www.google.com/search?q=allintitle%3A%22%CE%98%CE%B1%CE%BD%CE%AC%CF%83%CE%B7%CF%82%2B%CE%A0%CE%B1%CF%80%CE%B1%CE%BA%CF%89%CE%BD%CF%83%CF%84%CE%B1%CE%BD%CF%84%CE%AF%CE%BD%CE%BF%CF%85%22%2B%22%CE%89%CE%BC%CE%B5%CF%81%CE%BF%CF%82%2B%CE%8E%CF%80%CE%BD%CE%BF%CF%82%22%2Blyrics");
		std::string page;
		if (DownloadWebPage(page, m_hNet, query.c_str()))
		{
			const INT cLinkLen = 1000;
			CHAR linkBuffer[cLinkLen];
			LPCSTR startPos = page.c_str(); 
			while (startPos != NULL)
			{
				startPos = strstr(startPos, "h3 class=\"r\">");
				if (startPos)
				{
					startPos = strstr(startPos, "http:");
					if (startPos)
					{
						LPCSTR endPos = NULL;
						if (startPos[-1] == '"')
							endPos = strchr(startPos + 1, '"');
						else
							endPos = strchr(startPos + 1, '&');
						if (endPos)
						{
							if (endPos - startPos < (INT)cLinkLen)
							{
								strncpy(linkBuffer, startPos, endPos - startPos);
								linkBuffer[endPos - startPos] = 0;
								if (IsBlackListed(linkBuffer) == FALSE)
									m_googleLinks.push_back(linkBuffer);
							}
							else
								TRACE(_T("@1 GoogleLyricsProvider::NextResult. link bigger than expected (FAILED)\r\n"));
						}
						else
							TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find endPos '%0.150s'\r\n"), CA2CT(&startPos[-1]));
						startPos = endPos;
					}
					else
						TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find startPos (2) (FAILED)\r\n"));
				}
				else
					TRACE(_T("@1 GoogleLyricsProvider::NextResult. Can't find startPos (1) (FAILED)\r\n"));
			}
			if (m_googleLinks.empty())
			{
				startPos = page.c_str(); 
				//=== This happens on iGoogle
				while (startPos != NULL)
				{
					startPos = strstr(startPos, " class=r>");
					if (startPos)
					{
						startPos = strstr(startPos, "http:");
						if (startPos)
						{
							LPCSTR endPos = NULL;
							if (startPos[-1] == '"')
								endPos = strchr(startPos + 1, '"');
							else
								endPos = strchr(startPos + 1, '&');
							if (endPos)
							{
								if (endPos - startPos < (INT)cLinkLen)
								{
									strncpy(linkBuffer, startPos, endPos - startPos);
									linkBuffer[endPos - startPos] = 0;
									if (IsBlackListed(linkBuffer) == FALSE)
										m_googleLinks.push_back(linkBuffer);
								}
								else
									TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) link bigger than expected (FAILED)\r\n"));
							}
							else
								TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find endPos '%0.150s'\r\n"), CA2CT(&startPos[-1]));
							startPos = endPos;
						}
						else
							TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find startPos (2) (FAILED)\r\n"));
					}
					else
						TRACE(_T("@1 GoogleLyricsProvider::NextResult. (r) Can't find startPos (1) (FAILED)\r\n"));
				}

			}

		}
		else
			TRACE(_T("@1 GoogleLyricsProvider::NextResult(). Dowloading Google Page (FAILED)\r\n"));

	}

	std::wstring page;
	while (m_curResult < (INT)m_googleLinks.size())
	{
		BOOL bResultFound = FALSE;
		if (DownloadWebPageUnicode(page, m_hNet, (LPCTSTR)CA2CT(m_googleLinks[m_curResult].c_str())))
		{
			RemoveEnclosedString(page, _T("<!--"), _T("-->"));
			RemoveEnclosedString(page, _T("<head"), _T("/head>"));
			RemoveEnclosedString(page, _T("<script"), _T("/script>"));
			RemoveEnclosedString(page, _T("<a href="), _T("/a>"));
			ReplaceHtmlEntities(page);
			if (ExtractLyrics(page))
			{

				//InlineHTML2Text(pResult.get());
				//m_result = pResult.get();
				m_result = page;
				LPCTSTR delims = _T(" \t\n\r");
				m_result.erase(0, m_result.find_first_not_of(delims));		//Trim Left
				m_result.erase(m_result.find_last_not_of(delims) + 1);		//Trim Right
				if (m_result.size() > 40)
				{
					//Keep Additional Info (The Web Site Provider)
					LPCSTR pLink = m_googleLinks[m_curResult].c_str();
					LPCSTR sp = &pLink[7];
					LPCSTR ep = strchr(&pLink[8], '/');
					if (ep != 0)
					{
						CHAR SourceSiteU[MAX_PATH];
						strncpy(SourceSiteU, sp, INT(ep - sp) > 99 ? 99 : INT(ep - sp));
						SourceSiteU[INT(ep - sp) > 99 ? 99 : INT(ep - sp)] = 0;
						m_resultAdditionalInfo = (LPCTSTR)CA2CT(SourceSiteU);
					}
					else
						m_resultAdditionalInfo.clear();
					bResultFound = TRUE;
				}
			}
			else
				TRACE(_T("@1 GoogleLyricsProvider::GetResult(). ExtractLyrics (FAILED)\r\n"));
		}		
		else
			TRACE(_T("@1 GoogleLyricsProvider::GetResult(). Dowloading Lyrics Page (FAILED) '%s'\r\n"), (LPCTSTR)CA2CT(m_googleLinks[m_curResult].c_str()));

		m_curResult++;
		if (bResultFound)
			break;
	}
	if (m_result.empty())
		return FALSE;
	result.main = m_result.c_str();
	result.additionalInfo = m_resultAdditionalInfo.c_str();
	result.service = m_request.service;
	return TRUE;
}
Esempio n. 2
0
BOOL TestWebPageUtilities()
{
	UNITTESTSTART;
	for (int i = 0; i < sizeof(repEntitiesTest)/sizeof(LPCTSTR); i+=2)
	{
		std::wstring test(repEntitiesTest[i]);
		ReplaceHtmlEntities(test);
		UNITTEST(test == repEntitiesTest[i+1]);
	}
	//UNITTEST(ReplaceHtmlEntities(std::wstring(_T("Only & and && tests for &"))) == 0);
	//UNITTEST(ReplaceHtmlEntities(std::wstring(_T("This is a test &rt;&amp;&lt;&#0039;&#039;&#39; for &amp; at the end &#0039;"))) == 8);
	//UNITTEST(ReplaceHtmlEntities(std::wstring(_T(""))) == 1);


	HINTERNET hNet = InternetOpen(_T("UnitTest"), INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, NULL);
	std::string page;
	if (hNet)
	{
		page.clear();
		UNITTEST(!DownloadWebPage(page, hNet, _T("123567")));
		UNITTEST(!DownloadWebPage(page, hNet, _T("")));
		UNITTEST(page.empty());
		if (DownloadWebPage(page, hNet, _T("http://www.in.gr/")))
			UNITTEST(!page.empty());

		std::wstring pageU;
		if (DownloadWebPageUnicode(pageU, hNet, _T("http://www.in.gr/")))
		{
			UNITTEST(!page.empty());
			std::wstring UnicodeAnsiPage;
			INT cp = GetWindowsCodePageA(page.c_str());
			if (cp == 0)
				cp = 1252;
			Ansi2Unicode(UnicodeAnsiPage, page, cp);
			UNITTEST(UnicodeAnsiPage == pageU);
		}
		InternetCloseHandle(hNet);
	}

	page = "<html content=\"text/html; charset=windows-452\">This is a &amp; test</html>";
	UNITTEST(GetWindowsCodePageA(page.c_str()) == 452);
	UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=UTF-8\">") == 65001);
	UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=utf-8\">") == 65001);
	UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=iso-8859-7\"") == 28597);
	UNITTEST(GetWindowsCodePageA("content=\"text/html; charset=iso-8859-1\"") == 28591);

	std::wstring pageU;
	Ansi2Unicode(pageU, page);
	UNITTEST(ReplaceHtmlEntities(pageU) == 1);
	UNITTEST(pageU == _T("<html content=\"text/html; charset=windows-452\">This is a & test</html>"));

	std::tstring text;
	GetTextFromHtmlFragment(text, pageU.c_str(), &pageU.c_str()[page.size() - 1]);
	UNITTEST(text == _T("This is a & test"));
	int a = (int) text.size();
	LPCTSTR s = text.c_str();



	return TRUE;
}
Esempio n. 3
0
BOOL LastFMServices::HandShake(std::basic_string<TCHAR>& sessionID, 
							   std::basic_string<TCHAR>& nowPlayingURL, 
							   std::basic_string<TCHAR>& submissionURL)
{
	sessionID.clear();
	nowPlayingURL.clear();
	submissionURL.clear();
	if (m_username.empty() || m_MD5password.empty())
	{
		//TRACE(_T("@4 LastFMServices::HandShake. Empty Authorization.\r\n"));
		m_error = ERR_BadAuth;
		return FALSE;
	}
	SYSTEMTIME st;
	GetSystemTime(&st);
	UINT ts = SystemTime2UnixTimeStamp(st);
	LPCTSTR auth = _T("auth");
	cMD5 md5;
	CHAR authStr[100];
	_snprintf(authStr, 100, "%s%u", (LPCSTR)CT2CA(m_MD5password.c_str()), ts);


	//http://post.audioscrobbler.com/?hs=true&p=1.2.1&c=<client-id>&v=<client-ver>&u=<user>&t=<timestamp>&a=<auth>
	TCHAR url[2000];
	_sntprintf(url, 2000, 
		_T("http://post.audioscrobbler.com/?hs=true&p=%s&c=%s&v=%s&u=%s&t=%u&a=%.32s"),
		sProtocolVer,
		sClientID,
		sClientVer,
		m_username.c_str(),
		ts,
		(LPCTSTR)CA2CT(_strlwr(md5.CalcMD5FromString(authStr))));

	TRACE(_T("About to request: '%s'\r\n"), url);
	m_state = ST_Connecting;

	std::string page;
	if (DownloadWebPage(page, m_hNet, url))
	{
		std::string delimiter = "\n";
		std::string line;
		INT pos = getToken(page, 0, delimiter, line);
		INT count = 0;
		while (pos != -1)
		{
			switch (count)
			{
			case 0:
				m_errorString = line;
				if (line == "OK")
					m_error = ERR_None;
				else if (line == "BANNED")
					m_error = ERR_Banned;
				else if (line == "BADAUTH")
					m_error = ERR_BadAuth;
				else if (line == "BADTIME")
					m_error = ERR_BadTime;
				else 
					m_error = ERR_Failed;
				break;
			case 1:
				sessionID = (LPCTSTR)CA2CT(line.c_str());
				break;
			case 2:
				nowPlayingURL = (LPCTSTR)CA2CT(line.c_str());
				break;
			case 3:
				submissionURL = (LPCTSTR)CA2CT(line.c_str());
				break;
			default:
				break;
			}
			if (m_error != ERR_None)
				break;
			count++;
			pos = getToken(page, pos, delimiter, line);
		}
		if (m_error == ERR_None)
		{
			TRACE(_T("@4 LastFMServices::HandShake. success.\r\n"));
			m_state = ST_Idle;
		}
		else
		{
			TRACE(_T("@4 LastFMServices::HandShake. fail: '%s'.\r\n"), CA2CT(GetErrorString()));
			m_state = ST_Error;
		}
	}
	else //---Download page failed
	{
		m_error = ERR_CommunicationError;
		m_state = ST_Error;
		TRACE(_T("@4 LastFMServices::HandShake. fail: Communication Error.\r\n"));
	}
	return m_state == ST_Idle;

}