C++ (Cpp) CScraperUrl::ParseElement Exemples

Exemple #1

0

Afficher le fichier

void CMusicInfoScraper::FindAlbuminfo()
{
  CStdString strAlbum=m_strAlbum;
  CStdString strHTML;
  m_vecAlbums.erase(m_vecAlbums.begin(), m_vecAlbums.end());

  CScraperParser parser;
  if (!parser.Load(_P("q:\\system\\scrapers\\music\\"+m_info.strPath)))
    return;

  if (!m_info.settings.GetPluginRoot() || m_info.settings.GetSettings().IsEmpty())
  {
    m_info.settings.LoadSettingsXML(_P("q:\\system\\scrapers\\music\\"+m_info.strPath));
    m_info.settings.SaveFromDefault();
  }

  parser.m_param[0] = strAlbum;
  parser.m_param[1] = m_strArtist;
  CUtil::URLEncode(parser.m_param[0]);
  CUtil::URLEncode(parser.m_param[1]);

  CScraperUrl scrURL;
  scrURL.ParseString(parser.Parse("CreateAlbumSearchUrl"));
  if (!CScraperUrl::Get(scrURL.m_url[0], strHTML, m_http) || strHTML.size() == 0)
  {
    CLog::Log(LOGERROR, "%s: Unable to retrieve web site",__FUNCTION__);
    return;
  }

  parser.m_param[0] = strHTML;
  CStdString strXML = parser.Parse("GetAlbumSearchResults",&m_info.settings);
  if (strXML.IsEmpty())
  {
    CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
    return;
  }

  if (strXML.Find("encoding=\"utf-8\"") < 0)
    g_charsetConverter.unknownToUTF8(strXML);

  // ok, now parse the xml file
  TiXmlDocument doc;
  doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8);
  if (!doc.RootElement())
  {
    CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__);
    return;
  }
  TiXmlHandle docHandle( &doc );
  TiXmlElement* album = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element();
  if (!album)
    return;

  while (album)
  {
    TiXmlNode* title = album->FirstChild("title");
    TiXmlElement* link = album->FirstChildElement("url");
    TiXmlNode* artist = album->FirstChild("artist");
    TiXmlNode* year = album->FirstChild("year");
    if (title && title->FirstChild())
    {
      CStdString strTitle = title->FirstChild()->Value();
      CStdString strArtist;
      CStdString strAlbumName;

      if (artist && artist->FirstChild())
      {
        strArtist = artist->FirstChild()->Value();
        strAlbumName.Format("%s - %s",strArtist.c_str(),strTitle.c_str());
      }
      else
        strAlbumName = strTitle;

      if (year && year->FirstChild())
        strAlbumName.Format("%s (%s)",strAlbumName.c_str(),year->FirstChild()->Value());

      CScraperUrl url;
      if (!link)
        url.ParseString(scrURL.m_xml);

      while (link && link->FirstChild())
      {
        url.ParseElement(link);
        link = link->NextSiblingElement("url");
      }
      CMusicAlbumInfo newAlbum(strTitle, strArtist, strAlbumName, url);
      m_vecAlbums.push_back(newAlbum);
    }
    album = album->NextSiblingElement();
  }
  
  if (m_vecAlbums.size()>0)
    m_bSuccessfull=true;

  return;
}

Exemple #2

0

Afficher le fichier

Fichier : MusicInfoScraper.cpp Projet : AaronDnz/xbmc

void CMusicInfoScraper::FindArtistInfo()
{
  CStdString strArtist=m_strArtist;
  CStdString strHTML;
  m_vecArtists.erase(m_vecArtists.begin(), m_vecArtists.end());

  if (!m_scraper->Load())
    return;

  vector<CStdString> extras;
  extras.push_back(m_strArtist);
  g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), m_strArtist, extras[0]);
  CURL::Encode(extras[0]);
  
  CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper (file: '%s', content: '%s', version: '%s')",
    __FUNCTION__, m_strArtist.c_str(), m_scraper->Name().c_str(), m_scraper->Path().c_str(),
    ADDON::TranslateContent(m_scraper->Content()).c_str(), m_scraper->Version().c_str());

  CScraperUrl scrURL;
  vector<CStdString> url = m_scraper->Run("CreateArtistSearchUrl",scrURL,m_http,&extras);
  if (url.empty())
    return;
  scrURL.ParseString(url[0]);

  vector<CStdString> xml = m_scraper->Run("GetArtistSearchResults",scrURL,m_http,&extras);

  for (vector<CStdString>::iterator it  = xml.begin();
                                    it != xml.end(); ++it)
  {
    // ok, now parse the xml file
    TiXmlDocument doc;
    doc.Parse(it->c_str(),0,TIXML_ENCODING_UTF8);
    if (!doc.RootElement())
    {
      CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__);
      continue;
    }

    TiXmlHandle docHandle( &doc );
    TiXmlElement* artist = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element();

    while (artist)
    {
      TiXmlNode* title = artist->FirstChild("title");
      TiXmlNode* year = artist->FirstChild("year");
      TiXmlNode* genre = artist->FirstChild("genre");
      TiXmlElement* link = artist->FirstChildElement("url");
      if (title && title->FirstChild())
      {
        CStdString strTitle = title->FirstChild()->Value();
        CScraperUrl url;
        if (!link)
          url.ParseString(scrURL.m_xml);
        while (link && link->FirstChild())
        {
          url.ParseElement(link);
          link = link->NextSiblingElement("url");
        }
        CMusicArtistInfo newArtist(strTitle, url);
        if (genre && genre->FirstChild())
          newArtist.GetArtist().strGenre = genre->FirstChild()->Value();
        if (year && year->FirstChild())
          newArtist.GetArtist().strBorn = year->FirstChild()->Value();
        m_vecArtists.push_back(newArtist);
      }
      artist = artist->NextSiblingElement();
    }
  }

  if (m_vecArtists.size()>0)
    m_bSucceeded=true;

  return;
}

Exemple #3

0

Afficher le fichier

Fichier : MusicInfoScraper.cpp Projet : AaronDnz/xbmc

void CMusicInfoScraper::FindAlbumInfo()
{
  CStdString strAlbum=m_strAlbum;
  CStdString strHTML;
  m_vecAlbums.erase(m_vecAlbums.begin(), m_vecAlbums.end());

  if (!m_scraper->Load() || !m_scraper->GetParser().HasFunction("CreateAlbumSearchUrl"))
    return;

  CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper (path: '%s', content: '%s', version: '%s')",
    __FUNCTION__, m_strArtist.c_str(), strAlbum.c_str(), m_scraper->Name().c_str(), m_scraper->Path().c_str(),
    ADDON::TranslateContent(m_scraper->Content()).c_str(), m_scraper->Version().c_str());
  
  vector<CStdString> extras;
  extras.push_back(strAlbum);
  extras.push_back(m_strArtist);
  g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), strAlbum, extras[0]);
  g_charsetConverter.utf8To(m_scraper->GetParser().GetSearchStringEncoding(), m_strArtist, extras[1]);
  CURL::Encode(extras[0]);
  CURL::Encode(extras[1]);

  CScraperUrl scrURL;
  vector<CStdString> url = m_scraper->Run("CreateAlbumSearchUrl",scrURL,m_http,&extras);
  if (url.empty())
    return;
  scrURL.ParseString(url[0]);
  vector<CStdString> xml = m_scraper->Run("GetAlbumSearchResults",scrURL,m_http);

  for (vector<CStdString>::iterator it  = xml.begin();
                                    it != xml.end(); ++it)
  {
    // ok, now parse the xml file
    TiXmlDocument doc;
    doc.Parse(it->c_str(),0,TIXML_ENCODING_UTF8);
    TiXmlHandle docHandle( &doc );
    TiXmlElement* album = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element();

    while (album)
    {
      TiXmlNode* title = album->FirstChild("title");
      TiXmlElement* link = album->FirstChildElement("url");
      TiXmlNode* artist = album->FirstChild("artist");
      TiXmlNode* year = album->FirstChild("year");
      TiXmlElement* relevance = album->FirstChildElement("relevance");
      if (title && title->FirstChild())
      {
        CStdString strTitle = title->FirstChild()->Value();
        CStdString strArtist;
        CStdString strAlbumName;

        if (artist && artist->FirstChild())
        {
          strArtist = artist->FirstChild()->Value();
          strAlbumName.Format("%s - %s",strArtist.c_str(),strTitle.c_str());
        }
        else
          strAlbumName = strTitle;

        if (year && year->FirstChild())
          strAlbumName.Format("%s (%s)",strAlbumName.c_str(),year->FirstChild()->Value());

        CScraperUrl url;
        if (!link)
          url.ParseString(scrURL.m_xml);

        while (link && link->FirstChild())
        {
          url.ParseElement(link);
          link = link->NextSiblingElement("url");
        }
        CMusicAlbumInfo newAlbum(strTitle, strArtist, strAlbumName, url);
        if (relevance && relevance->FirstChild())
        {
          float scale=1;
          const char* newscale = relevance->Attribute("scale");
          if (newscale)
            scale = (float)atof(newscale);
          newAlbum.SetRelevance((float)atof(relevance->FirstChild()->Value())/scale);
        }
        m_vecAlbums.push_back(newAlbum);
      }
      album = album->NextSiblingElement();
    }
  }

  if (m_vecAlbums.size()>0)
    m_bSucceeded=true;

  return;
}

Exemple #4

0

Afficher le fichier

Fichier : IMDB.cpp Projet : flyingtime/boxee

int CIMDB::InternalFindMovie(const CStdString &strMovie, IMDB_MOVIELIST& movielist, bool& sortMovieList, const CStdString& strFunction, CScraperUrl* pUrl)
{
  movielist.clear();

  CScraperUrl scrURL;
  
  CStdString strName = strMovie;
  CStdString movieTitle, movieTitleAndYear, movieYear;
  CUtil::CleanString(strName, movieTitle, movieTitleAndYear, movieYear, true);

  movieTitle.ToLower();

  CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper (file: '%s', content: '%s', language: '%s', date: '%s', framework: '%s')",
    __FUNCTION__, movieTitle.c_str(), m_info.strTitle.c_str(), m_info.strPath.c_str(), m_info.strContent.c_str(), m_info.strLanguage.c_str(), m_info.strDate.c_str(), m_info.strFramework.c_str());

  if (!pUrl)
  {
  if (m_parser.HasFunction("CreateSearchUrl"))
  {
      GetURL(strMovie, movieTitle, movieYear, scrURL);
  }
  else if (m_info.strContent.Equals("musicvideos"))
  {
    if (!m_parser.HasFunction("FileNameScrape"))
      return false;
    
    CScraperUrl scrURL("filenamescrape");
    scrURL.strTitle = strMovie;
    movielist.push_back(scrURL);
      return 1;
  }
    if (scrURL.m_xml.IsEmpty())
      return 0;
  }
  else
    scrURL = *pUrl;  

  vector<CStdString> strHTML;
  for (unsigned int i=0;i<scrURL.m_url.size();++i)
  {
    CStdString strCurrHTML;
    if (!CScraperUrl::Get(scrURL.m_url[i],strCurrHTML,m_http) || strCurrHTML.size() == 0)
      return 0;
    strHTML.push_back(strCurrHTML);
  }
  
  // now grab our details using the scraper
  for (unsigned int i=0;i<strHTML.size();++i)
    m_parser.m_param[i] = strHTML[i];
  m_parser.m_param[strHTML.size()] = scrURL.m_url[0].m_url;
  CStdString strXML = m_parser.Parse(strFunction,&m_info.settings);
  //CLog::Log(LOGDEBUG,"scraper: %s returned %s",strFunction.c_str(),strXML.c_str());
  if (strXML.IsEmpty())
  {
    CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
    return 0;
  }
  
  if (!XMLUtils::HasUTF8Declaration(strXML))
    g_charsetConverter.unknownToUTF8(strXML);

  // ok, now parse the xml file
  TiXmlDocument doc;
  doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8);
  if (!doc.RootElement())
  {
    CLog::Log(LOGERROR, "%s: Unable to parse xml",__FUNCTION__);
    return 0;
  }
  if (stricmp(doc.RootElement()->Value(),"error")==0)
  {
    TiXmlElement* title = doc.RootElement()->FirstChildElement("title");
    CStdString strTitle;
    if (title && title->FirstChild() && title->FirstChild()->Value())
      strTitle = title->FirstChild()->Value();
    TiXmlElement* message = doc.RootElement()->FirstChildElement("message");
    CStdString strMessage;
    if (message && message->FirstChild() && message->FirstChild()->Value())
      strMessage = message->FirstChild()->Value();
    CGUIDialogOK* dialog = (CGUIDialogOK*)g_windowManager.GetWindow(WINDOW_DIALOG_OK);
    dialog->SetHeading(strTitle);
    dialog->SetLine(0,strMessage);
    g_application.getApplicationMessenger().DoModal(dialog,WINDOW_DIALOG_OK);
    return -1;
  }
 
  TiXmlHandle docHandle( &doc );

  TiXmlElement* xurl = doc.RootElement()->FirstChildElement("url");
  while (xurl && xurl->FirstChild())
  {
    const char* szFunction = xurl->Attribute("function");
    if (szFunction)
    {
      CScraperUrl scrURL(xurl);
      InternalFindMovie(strMovie,movielist,sortMovieList,szFunction,&scrURL);
    }
    xurl = xurl->NextSiblingElement("url");
  }

  TiXmlElement *movie = docHandle.FirstChild( "results" ).FirstChild( "entity" ).Element();
  if (!movie)
    return 0;

  while (movie)
  {
    // is our result already sorted correctly when handed over from scraper? if so, do not let xbmc sort it
    if (sortMovieList)
    {
      TiXmlElement* results = docHandle.FirstChild("results").Element();
      if (results)
      {
        CStdString szSorted = results->Attribute("sorted");
        sortMovieList = (szSorted.CompareNoCase("yes") != 0);
      }
    }

    CScraperUrl url;
    TiXmlNode *title = movie->FirstChild("title");
    TiXmlElement *link = movie->FirstChildElement("url");
    TiXmlNode *year = movie->FirstChild("year");
    TiXmlNode* id = movie->FirstChild("id");
    TiXmlNode* language = movie->FirstChild("language");
    if (title && title->FirstChild() && link && link->FirstChild())
    {
      url.strTitle = title->FirstChild()->Value();
      while (link && link->FirstChild())
      {
        url.ParseElement(link);
        link = link->NextSiblingElement("url");
      }
      if (id && id->FirstChild())
        url.strId = id->FirstChild()->Value();

      // calculate the relavance of this hit
      CStdString compareTitle = url.strTitle;
      compareTitle.ToLower();
      CStdString matchTitle = movieTitle;
      matchTitle.ToLower();
      // see if we need to add year information
      CStdString compareYear;
        if(year && year->FirstChild())
        compareYear = year->FirstChild()->Value();
      if (!movieYear.IsEmpty() && !compareYear.IsEmpty())
          {
        matchTitle.AppendFormat(" (%s)", movieYear.c_str());
        compareTitle.AppendFormat(" (%s)", compareYear.c_str());
          }
      url.relevance = fstrcmp(matchTitle.c_str(), compareTitle.c_str(), 0);
      // reconstruct a title for the user
      CStdString title = url.strTitle;
      if (!compareYear.IsEmpty())
        title.AppendFormat(" (%s)", compareYear.c_str());
      if (language && language->FirstChild())
        title.AppendFormat(" (%s)", language->FirstChild()->Value());
      url.strTitle = title;
        movielist.push_back(url);
    }
    movie = movie->NextSiblingElement();
  }

  return 1;
}