const uint8_t Burningseries::getlinks(std::vector<Global::episodepair> &episodes) { Global::debug("burningseries.cpp"); std::vector<Global::episodepair> pages; uint8_t ret; const std::map<const Config::HostingProviders, const Config::providerpair> &providermap = _cfg.get_providermap(); ret = get_episode_pages(pages); if (ret != 0) { return ret; } for (const Global::episodepair &epair : pages) { std::map<const Config::HostingProviders, const Config::providerpair>::const_iterator itm; for (itm = providermap.begin(); itm != providermap.end(); ++itm) { if (itm->second.first == epair.second) { // Look up provider of found link in providermap const Config::HostingProviders provider = itm->first; std::string content = getpage(epair.first); std::regex reHosterPage( "(<a href| src)=[\"\'](https?://bs.to/out/.*)[\"\']( target=|></iframe>)"); std::regex reTitle(std::string("<h2 id=\"titleGerman\">(.*)") + "[[:space:]]+<small id=\"titleEnglish\" lang=\"en\">(.*)</small>"); std::smatch match; std::string hosterurl; std::string title; if (std::regex_search(content, match, reHosterPage)) { hosterurl = match[2].str(); if (_cfg.get_resolve()) { resolve_redirect(hosterurl); } for (const Config::HostingProviders &provider_ssl : _cfg.get_providers_ssl()) { // Make sure we use SSL where supported if (provider_ssl == provider) { if (hosterurl.find("https") == std::string::npos) { // Replace "http" with "https" hosterurl = "https" + hosterurl.substr(4, std::string::npos); } } } } else { std::cerr << "Error extracting stream" << std::endl; } if (std::regex_search(content, match, reTitle)) { if (match[1].str() != "") // German title = match[1].str(); else if (match[2].str() != "") // English title = match[2].str(); } episodes.push_back(Global::episodepair(hosterurl, title)); } } } return 0; }
QUrl ImdbParser::Parse( const QByteArray& data ) { DebugPrintFunc( "ImdbParser::Parse", data.size() ); QString foundedPage( data ); RegExpTools::SimplifyText( foundedPage ); QRegExp reRedirect( "class=\"number\">1.</td><td class=\"title\"><a href=\"(.*)\">"); QString redirectUrl( RegExpTools::ParseItem( foundedPage, reRedirect ) ); QUrl posterUrl; if( !redirectUrl.isEmpty() ) { redirectUrl = "http://www.imdb.com" + redirectUrl; QString str( request.runSync( QUrl( redirectUrl ) ) ); RegExpTools::SimplifyText( str ); DebugPrint( QString( "Simpified to: %1 bytes" ).arg( str.size() ) ); // Title QRegExp reTitle( "h1 itemprop=\"name\".*>(.*)<span" ); film.SetColumnData( FilmItem::TitleColumn, RegExpTools::ParseItem( str, reTitle ) ); // Original title QRegExp reOriginalTitle( "div class=\"originalTitle\">(.*)<span" ); film.SetColumnData( FilmItem::OriginalTitleColumn, RegExpTools::ParseItem( str, reOriginalTitle ) ); // Tagline QRegExp reTagline( "Taglines:</h4>(.*)<" ); film.SetColumnData( FilmItem::TaglineColumn, RegExpTools::ParseItem( str, reTagline ) ); // Year QRegExp reYear( "Release Date:</h4>.*([0-9]{4})" ); film.SetColumnData( FilmItem::YearColumn, RegExpTools::ParseItem( str, reYear ).toInt() ); // Budget QRegExp reBudget( "Budget:</h4> \\$(.*)<span" ); film.SetColumnData( FilmItem::BudgetColumn, RegExpTools::ParseItem( str, reBudget ).replace( " ", "" ).toDouble() ); // Rating QRegExp reRating( "itemprop=\"ratingValue\">(.*)</span>" ); film.SetColumnData( FilmItem::RatingColumn, RegExpTools::ParseItem( str, reRating ).replace( ",", "." ).toDouble() ); // Country QRegExp reCountryList( "Countr.*</h4>(.*)</div>" ); QRegExp reCountry( "href=\"/country/.*>(.*)</a>" ); film.SetColumnData( FilmItem::CountryColumn, RegExpTools::ParseList( str, reCountryList, reCountry ) ); // Genre QRegExp reGenreList( "Genres:</h4>(.*)</div>" ); QRegExp reGenre( "href=\"/genre/.*>(.*)</a>" ); film.SetColumnData( FilmItem::GenreColumn, RegExpTools::ParseList( str, reGenreList, reGenre ) ); // Description QRegExp reDescription( "Storyline</h2><.*><p>(.*)<em class=\"nobr\">" ); film.SetColumnData( FilmItem::DescriptionColumn, RegExpTools::ParseItem( str, reDescription ).replace( "<br><br>", "<br>\n" ) ); // Advanced information QRegExp reName( "itemprop=\"name\">(.*)</span>" ); if( AlexandraSettings::GetInstance()->GetParsersLoadAdvancedInfo() ) { QString str( request.runSync( QUrl( redirectUrl + "fullcredits" ) ) ); RegExpTools::SimplifyText( str ); DebugPrint( QString( "Simpified to: %1 bytes" ).arg( str.size() ) ); // Starring QRegExp reStarringList( "Cast <span>(.*)Produced by" ); film.SetColumnData( FilmItem::StarringColumn, RegExpTools::ParseList( str, reStarringList, reName, 20 ) ); // 20 first actors // Director QRegExp reDirectorList( "Directed by(.*)Writing Credits" ); reName = QRegExp( "href=\"/name/.*>(.*)</a>" ); film.SetColumnData( FilmItem::DirectorColumn, RegExpTools::ParseList( str, reDirectorList, reName, 10 ) ); // 10 first directors // Screenwriter QRegExp reScreenwriterList( "Writing Credits(.*)Cast" ); film.SetColumnData( FilmItem::ScreenwriterColumn, RegExpTools::ParseList( str, reScreenwriterList, reName, 10 ) ); // 10 first writers // Producer QRegExp reProducerList( "Produced by(.*)Music by" ); film.SetColumnData( FilmItem::ProducerColumn, RegExpTools::ParseList( str, reProducerList, reName, 10 ) ); // 10 first producers // Composer QRegExp reComposerList( "Music by(.*)Film Editing by" ); film.SetColumnData( FilmItem::ComposerColumn, RegExpTools::ParseList( str, reComposerList, reName ) ); } else { // Starring QRegExp reStarringList( "Cast</h2>(.*)</table>" ); film.SetColumnData( FilmItem::StarringColumn, RegExpTools::ParseList( str, reStarringList, reName ) ); // Director QRegExp reDirectorList( "Director.*</h4>(.*)</div>" ); QRegExp reName( "itemprop=\"name\">(.*)</span>" ); film.SetColumnData( FilmItem::DirectorColumn, RegExpTools::ParseList( str, reDirectorList, reName ) ); // Screenwriter QRegExp reScreenwriterList( "Writer.*</h4>(.*)</div>" ); film.SetColumnData( FilmItem::ScreenwriterColumn, RegExpTools::ParseList( str, reScreenwriterList, reName ) ); } // Poster if( AlexandraSettings::GetInstance()->GetParsersLoadBigPoster() ) { QRegExp rePoster( "div class=\"poster\"><a href=\"(.*)\"" ); QString s = QString( request.runSync( QUrl( "http://www.imdb.com" + RegExpTools::ParseItem( str, rePoster ) ) ) ); RegExpTools::SimplifyText( s ); DebugPrint( QString( "Simpified to: %1 bytes" ).arg( s.size() ) ); rePoster = QRegExp( "id=\"primary-img\".*src=\"(.*)\"" ); posterUrl = RegExpTools::ParseItem( s, rePoster ); } if( posterUrl.isEmpty() ) // Small poster { QRegExp rePoster( "div class=\"poster\"><.*><img.*src=\"(.*)\"" ); posterUrl = RegExpTools::ParseItem( str, rePoster ); } DebugPrint( "Text parsed!" ); emit Loaded( film, posterUrl ); } else { emit Error( "Movie not found!" ); } return( posterUrl ); }
void ammendResults(const std::string& formatName, core::FilePath& targetFile, int sourceLine, json::Object* pResultJson) { // provide slide navigation for ioslides and beamer if (formatName != "ioslides_presentation" && formatName != "slidy_presentation" && formatName != "beamer_presentation") { return; } // alias for nicer map syntax json::Object& resultJson = *pResultJson; // read the input file std::vector<std::string> lines; Error error = core::readStringVectorFromFile(targetFile, &lines, false); if (error) { LOG_ERROR(error); return; } // scan the input file looking for headers and slide breaks int totalSlides = 0; std::vector<SlideNavigationItem> slideNavigationItems; bool inCode = false; bool inYaml = false; bool haveTitle = false; boost::regex reYaml("^\\-{3}\\s*$"); boost::regex reTitle("^title\\:(.*)$"); boost::regex reCode("^`{3,}.*$"); boost::regex reTitledSlide("^#(#)?([^|\\{]+).*$"); boost::regex reUntitledSlide("^(\\-{3,}|\\*{3,})\\w*$"); for (unsigned i = 0; i<lines.size(); i++) { // alias line const std::string& line = lines.at(i); // toggle code state if (boost::regex_search(line, reCode)) inCode = !inCode; // bail if we are in code if (inCode) continue; // look for a title if we don't have one if (!haveTitle || inYaml) { if (boost::regex_search(line, reYaml)) { if (!inYaml) { inYaml = true; } else if (inYaml) { // bail if there was no title if (!haveTitle) { break; } else { inYaml = false; } } } // titles only valid in yaml if (inYaml) { boost::smatch match; if (boost::regex_search(line, match, reTitle)) { std::string title = match[1]; boost::algorithm::trim(title); string_utils::stripQuotes(&title); if (title.empty()) title = "Untitled Slide"; SlideNavigationItem item(title, 0, totalSlides++, 1); slideNavigationItems.push_back(item); haveTitle = true; } } } // if we already have the title look for slides else { // titled slides boost::smatch match; if (boost::regex_search(line, match, reTitledSlide)) { std::string title = match[2]; boost::algorithm::trim(title); if (title.empty()) title = "Untitled Slide"; int indent = std::string(match[1]).empty() ? 0 : 1; SlideNavigationItem item(title, indent, totalSlides++, i+1); slideNavigationItems.push_back(item); } // untitled slides else if (boost::regex_search(line, reUntitledSlide)) { SlideNavigationItem item("Untitled Slide", 1, totalSlides++, i+1); slideNavigationItems.push_back(item); } } } // did we find slides? if (totalSlides > 0) { // determine which slide the cursor is on int previewSlide = 1; for (int i = (slideNavigationItems.size()-1); i>=0; i--) { const SlideNavigationItem& item = slideNavigationItems.at(i); if (sourceLine >= item.line) { previewSlide = item.index + 1; break; } } // return as json resultJson["preview_slide"] = previewSlide; json::Array jsonSlideNavigationItems; std::transform(slideNavigationItems.begin(), slideNavigationItems.end(), std::back_inserter(jsonSlideNavigationItems), itemAsJson); json::Object jsonSlideNavigation; jsonSlideNavigation["total_slides"] = totalSlides; jsonSlideNavigation["anchor_parens"] = formatName == "slidy_presentation"; jsonSlideNavigation["items"] = jsonSlideNavigationItems; resultJson["slide_navigation"] = jsonSlideNavigation; } }