void CMusicInfoScraper::FindAlbuminfo() { CStdString strAlbum=m_strAlbum; CStdString strHTML; m_vecAlbums.erase(m_vecAlbums.begin(), m_vecAlbums.end()); CScraperParser parser; if (!parser.Load(_P("q:\\system\\scrapers\\music\\"+m_info.strPath))) return; if (!m_info.settings.GetPluginRoot() || m_info.settings.GetSettings().IsEmpty()) { m_info.settings.LoadSettingsXML(_P("q:\\system\\scrapers\\music\\"+m_info.strPath)); m_info.settings.SaveFromDefault(); } parser.m_param[0] = strAlbum; parser.m_param[1] = m_strArtist; CUtil::URLEncode(parser.m_param[0]); CUtil::URLEncode(parser.m_param[1]); CScraperUrl scrURL; scrURL.ParseString(parser.Parse("CreateAlbumSearchUrl")); if (!CScraperUrl::Get(scrURL.m_url[0], strHTML, m_http) || strHTML.size() == 0) { CLog::Log(LOGERROR, "%s: Unable to retrieve web site",__FUNCTION__); return; } parser.m_param[0] = strHTML; CStdString strXML = parser.Parse("GetAlbumSearchResults",&m_info.settings); if (strXML.IsEmpty()) { CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__); return; } if (strXML.Find("encoding=\"utf-8\"") < 0) g_charsetConverter.unknownToUTF8(strXML); // ok, now parse the xml file TiXmlDocument doc; doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8); if (!doc.RootElement()) { CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__); return; } TiXmlHandle docHandle( &doc ); TiXmlElement* album = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element(); if (!album) return; while (album) { TiXmlNode* title = album->FirstChild("title"); TiXmlElement* link = album->FirstChildElement("url"); TiXmlNode* artist = album->FirstChild("artist"); TiXmlNode* year = album->FirstChild("year"); if (title && title->FirstChild()) { CStdString strTitle = title->FirstChild()->Value(); CStdString strArtist; CStdString strAlbumName; if (artist && artist->FirstChild()) { strArtist = artist->FirstChild()->Value(); strAlbumName.Format("%s - %s",strArtist.c_str(),strTitle.c_str()); } else strAlbumName = strTitle; if (year && year->FirstChild()) strAlbumName.Format("%s (%s)",strAlbumName.c_str(),year->FirstChild()->Value()); CScraperUrl url; if (!link) url.ParseString(scrURL.m_xml); while (link && link->FirstChild()) { url.ParseElement(link); link = link->NextSiblingElement("url"); } CMusicAlbumInfo newAlbum(strTitle, strArtist, strAlbumName, url); m_vecAlbums.push_back(newAlbum); } album = album->NextSiblingElement(); } if (m_vecAlbums.size()>0) m_bSuccessfull=true; return; }
void CMusicInfoScraper::FindArtistInfo() { CStdString strArtist=m_strArtist; CStdString strHTML; m_vecArtists.erase(m_vecArtists.begin(), m_vecArtists.end()); if (!m_scraper->Load()) return; vector<CStdString> extras; extras.push_back(m_strArtist); g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), m_strArtist, extras[0]); CURL::Encode(extras[0]); CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper (file: '%s', content: '%s', version: '%s')", __FUNCTION__, m_strArtist.c_str(), m_scraper->Name().c_str(), m_scraper->Path().c_str(), ADDON::TranslateContent(m_scraper->Content()).c_str(), m_scraper->Version().c_str()); CScraperUrl scrURL; vector<CStdString> url = m_scraper->Run("CreateArtistSearchUrl",scrURL,m_http,&extras); if (url.empty()) return; scrURL.ParseString(url[0]); vector<CStdString> xml = m_scraper->Run("GetArtistSearchResults",scrURL,m_http,&extras); for (vector<CStdString>::iterator it = xml.begin(); it != xml.end(); ++it) { // ok, now parse the xml file TiXmlDocument doc; doc.Parse(it->c_str(),0,TIXML_ENCODING_UTF8); if (!doc.RootElement()) { CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__); continue; } TiXmlHandle docHandle( &doc ); TiXmlElement* artist = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element(); while (artist) { TiXmlNode* title = artist->FirstChild("title"); TiXmlNode* year = artist->FirstChild("year"); TiXmlNode* genre = artist->FirstChild("genre"); TiXmlElement* link = artist->FirstChildElement("url"); if (title && title->FirstChild()) { CStdString strTitle = title->FirstChild()->Value(); CScraperUrl url; if (!link) url.ParseString(scrURL.m_xml); while (link && link->FirstChild()) { url.ParseElement(link); link = link->NextSiblingElement("url"); } CMusicArtistInfo newArtist(strTitle, url); if (genre && genre->FirstChild()) newArtist.GetArtist().strGenre = genre->FirstChild()->Value(); if (year && year->FirstChild()) newArtist.GetArtist().strBorn = year->FirstChild()->Value(); m_vecArtists.push_back(newArtist); } artist = artist->NextSiblingElement(); } } if (m_vecArtists.size()>0) m_bSucceeded=true; return; }
void CMusicInfoScraper::FindAlbumInfo() { CStdString strAlbum=m_strAlbum; CStdString strHTML; m_vecAlbums.erase(m_vecAlbums.begin(), m_vecAlbums.end()); if (!m_scraper->Load() || !m_scraper->GetParser().HasFunction("CreateAlbumSearchUrl")) return; CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper (path: '%s', content: '%s', version: '%s')", __FUNCTION__, m_strArtist.c_str(), strAlbum.c_str(), m_scraper->Name().c_str(), m_scraper->Path().c_str(), ADDON::TranslateContent(m_scraper->Content()).c_str(), m_scraper->Version().c_str()); vector<CStdString> extras; extras.push_back(strAlbum); extras.push_back(m_strArtist); g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), strAlbum, extras[0]); g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), m_strArtist, extras[1]); CURL::Encode(extras[0]); CURL::Encode(extras[1]); CScraperUrl scrURL; vector<CStdString> url = m_scraper->Run("CreateAlbumSearchUrl",scrURL,m_http,&extras); if (url.empty()) return; scrURL.ParseString(url[0]); vector<CStdString> xml = m_scraper->Run("GetAlbumSearchResults",scrURL,m_http); for (vector<CStdString>::iterator it = xml.begin(); it != xml.end(); ++it) { // ok, now parse the xml file TiXmlDocument doc; doc.Parse(it->c_str(),0,TIXML_ENCODING_UTF8); TiXmlHandle docHandle( &doc ); TiXmlElement* album = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element(); while (album) { TiXmlNode* title = album->FirstChild("title"); TiXmlElement* link = album->FirstChildElement("url"); TiXmlNode* artist = album->FirstChild("artist"); TiXmlNode* year = album->FirstChild("year"); TiXmlElement* relevance = album->FirstChildElement("relevance"); if (title && title->FirstChild()) { CStdString strTitle = title->FirstChild()->Value(); CStdString strArtist; CStdString strAlbumName; if (artist && artist->FirstChild()) { strArtist = artist->FirstChild()->Value(); strAlbumName.Format("%s - %s",strArtist.c_str(),strTitle.c_str()); } else strAlbumName = strTitle; if (year && year->FirstChild()) strAlbumName.Format("%s (%s)",strAlbumName.c_str(),year->FirstChild()->Value()); CScraperUrl url; if (!link) url.ParseString(scrURL.m_xml); while (link && link->FirstChild()) { url.ParseElement(link); link = link->NextSiblingElement("url"); } CMusicAlbumInfo newAlbum(strTitle, strArtist, strAlbumName, url); if (relevance && relevance->FirstChild()) { float scale=1; const char* newscale = relevance->Attribute("scale"); if (newscale) scale = (float)atof(newscale); newAlbum.SetRelevance((float)atof(relevance->FirstChild()->Value())/scale); } m_vecAlbums.push_back(newAlbum); } album = album->NextSiblingElement(); } } if (m_vecAlbums.size()>0) m_bSucceeded=true; return; }
int CIMDB::InternalFindMovie(const CStdString &strMovie, IMDB_MOVIELIST& movielist, bool& sortMovieList, const CStdString& strFunction, CScraperUrl* pUrl) { movielist.clear(); CScraperUrl scrURL; CStdString strName = strMovie; CStdString movieTitle, movieTitleAndYear, movieYear; CUtil::CleanString(strName, movieTitle, movieTitleAndYear, movieYear, true); movieTitle.ToLower(); CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper (file: '%s', content: '%s', language: '%s', date: '%s', framework: '%s')", __FUNCTION__, movieTitle.c_str(), m_info.strTitle.c_str(), m_info.strPath.c_str(), m_info.strContent.c_str(), m_info.strLanguage.c_str(), m_info.strDate.c_str(), m_info.strFramework.c_str()); if (!pUrl) { if (m_parser.HasFunction("CreateSearchUrl")) { GetURL(strMovie, movieTitle, movieYear, scrURL); } else if (m_info.strContent.Equals("musicvideos")) { if (!m_parser.HasFunction("FileNameScrape")) return false; CScraperUrl scrURL("filenamescrape"); scrURL.strTitle = strMovie; movielist.push_back(scrURL); return 1; } if (scrURL.m_xml.IsEmpty()) return 0; } else scrURL = *pUrl; vector<CStdString> strHTML; for (unsigned int i=0;i<scrURL.m_url.size();++i) { CStdString strCurrHTML; if (!CScraperUrl::Get(scrURL.m_url[i],strCurrHTML,m_http) || strCurrHTML.size() == 0) return 0; strHTML.push_back(strCurrHTML); } // now grab our details using the scraper for (unsigned int i=0;i<strHTML.size();++i) m_parser.m_param[i] = strHTML[i]; m_parser.m_param[strHTML.size()] = scrURL.m_url[0].m_url; CStdString strXML = m_parser.Parse(strFunction,&m_info.settings); //CLog::Log(LOGDEBUG,"scraper: %s returned %s",strFunction.c_str(),strXML.c_str()); if (strXML.IsEmpty()) { CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__); return 0; } if (!XMLUtils::HasUTF8Declaration(strXML)) g_charsetConverter.unknownToUTF8(strXML); // ok, now parse the xml file TiXmlDocument doc; doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8); if (!doc.RootElement()) { CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__); return 0; } if (stricmp(doc.RootElement()->Value(),"error")==0) { TiXmlElement* title = doc.RootElement()->FirstChildElement("title"); CStdString strTitle; if (title && title->FirstChild() && title->FirstChild()->Value()) strTitle = title->FirstChild()->Value(); TiXmlElement* message = doc.RootElement()->FirstChildElement("message"); CStdString strMessage; if (message && message->FirstChild() && message->FirstChild()->Value()) strMessage = message->FirstChild()->Value(); CGUIDialogOK* dialog = (CGUIDialogOK*)g_windowManager.GetWindow(WINDOW_DIALOG_OK); dialog->SetHeading(strTitle); dialog->SetLine(0,strMessage); g_application.getApplicationMessenger().DoModal(dialog,WINDOW_DIALOG_OK); return -1; } TiXmlHandle docHandle( &doc ); TiXmlElement* xurl = doc.RootElement()->FirstChildElement("url"); while (xurl && xurl->FirstChild()) { const char* szFunction = xurl->Attribute("function"); if (szFunction) { CScraperUrl scrURL(xurl); InternalFindMovie(strMovie,movielist,sortMovieList,szFunction,&scrURL); } xurl = xurl->NextSiblingElement("url"); } TiXmlElement *movie = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element(); if (!movie) return 0; while (movie) { // is our result already sorted correctly when handed over from scraper? if so, do not let xbmc sort it if (sortMovieList) { TiXmlElement* results = docHandle.FirstChild("results").Element(); if (results) { CStdString szSorted = results->Attribute("sorted"); sortMovieList = (szSorted.CompareNoCase("yes") != 0); } } CScraperUrl url; TiXmlNode *title = movie->FirstChild("title"); TiXmlElement *link = movie->FirstChildElement("url"); TiXmlNode *year = movie->FirstChild("year"); TiXmlNode* id = movie->FirstChild("id"); TiXmlNode* language = movie->FirstChild("language"); if (title && title->FirstChild() && link && link->FirstChild()) { url.strTitle = title->FirstChild()->Value(); while (link && link->FirstChild()) { url.ParseElement(link); link = link->NextSiblingElement("url"); } if (id && id->FirstChild()) url.strId = id->FirstChild()->Value(); // calculate the relavance of this hit CStdString compareTitle = url.strTitle; compareTitle.ToLower(); CStdString matchTitle = movieTitle; matchTitle.ToLower(); // see if we need to add year information CStdString compareYear; if(year && year->FirstChild()) compareYear = year->FirstChild()->Value(); if (!movieYear.IsEmpty() && !compareYear.IsEmpty()) { matchTitle.AppendFormat(" (%s)", movieYear.c_str()); compareTitle.AppendFormat(" (%s)", compareYear.c_str()); } url.relevance = fstrcmp(matchTitle.c_str(), compareTitle.c_str(), 0); // reconstruct a title for the user CStdString title = url.strTitle; if (!compareYear.IsEmpty()) title.AppendFormat(" (%s)", compareYear.c_str()); if (language && language->FirstChild()) title.AppendFormat(" (%s)", language->FirstChild()->Value()); url.strTitle = title; movielist.push_back(url); } movie = movie->NextSiblingElement(); } return 1; }