Esempio n. 1
0
  void se_parser_youtube::start_element(parser_context *pc,
                                        const xmlChar *name,
                                        const xmlChar **attributes)
  {
    const char *tag = (const char*)name;

    if (strcasecmp(tag, "item") == 0)
      {
        _in_item = true;
        // create new snippet.
        _sn = new seeks_snippet(_count + 1);
        _count++;
        _sn->_engine = feeds("youtube",_url);
        _sn->_doc_type = seeks_doc_type::VIDEO_THUMB;
        pc->_current_snippet = _sn;
        //const char *a_link = se_parser::get_attribute((const char**)attributes, "rdf:about");
        //pc->_current_snippet->_url = std::string(a_link);
      }
    if (_in_item && strcasecmp(tag, "title") == 0)
      {
        _in_title = true;
      }
    if (_in_item && strcasecmp(tag, "pubDate") == 0)
      {
        _in_date = true;
      }
    if (_in_item && strcasecmp(tag, "link") == 0)
      {
        _in_link = true;
      }
    if (_in_item && strcasecmp(tag, "description") == 0)
      {
        _in_description = true;
      }
  }
Esempio n. 2
0
  void se_parser_doku::start_element(parser_context *pc,
                                     const xmlChar *name,
                                     const xmlChar **attributes)
  {
    const char *tag = (const char*)name;
    if (strcasecmp(tag,"div") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (_results_flag && a_class && strcasecmp(a_class, "search_snippet") == 0)
          {
            _search_snippet = true;
          }
        else if (a_class && strcasecmp(a_class,"search_result") == 0)
          {
            // assert previous snippet if any.
            if (pc->_current_snippet)
              {
                if (pc->_current_snippet->_title.empty()  // consider the parsing did fail on the snippet.
                    || pc->_current_snippet->_url.empty()
                    || pc->_current_snippet->_summary.empty()
                    || pc->_current_snippet->_cite.empty())
                  {
                    delete pc->_current_snippet;
                    pc->_current_snippet = NULL;
                    _count--;
                  }
                else pc->_snippets->push_back(pc->_current_snippet);
              }

            // create new snippet.
            search_snippet *sp = new search_snippet(_count+1);
            _count++;
            sp->_engine = feeds("dokuwiki",_url);
            pc->_current_snippet = sp;
            _results_flag = true;

            //_cached_flag = false; // in case previous snippet did not close the cached flag.
          }
      }

    if (_results_flag && strcasecmp(tag,"a") == 0)
      {
        _link_flag = true;
        const char *a_link = se_parser::get_attribute((const char**)attributes,"href");

        if (a_link)
          {
            _link = std::string(a_link);
            _cite = std::string(a_link);
            //std::cout << _cite << std::endl;
          }
        const char *snip_title = se_parser::get_attribute((const char**)attributes,"title");

        if (snip_title)
          {
            _title = std::string(snip_title);
          }
      }
  }
Esempio n. 3
0
  void se_parser_redmine::start_element(parser_context *pc,
                                        const xmlChar *name,
                                        const xmlChar **attributes)
  {
    const char *tag = (const char*)name;

    if (strcasecmp(tag,"dl")==0)
      {
        const char *a_id = se_parser::get_attribute((const char**)attributes,"id");
        if (a_id && strcasecmp(a_id,"search-results")==0)
          _results_flag = true;
      }
    else if (_results_flag && strcasecmp(tag,"dt") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");

        // create new snippet.
        search_snippet *sp = new search_snippet(_count + 1);
        _count++;
        sp->_engine = feeds("redmine",_url);
        if (a_class)
          {
            if (strcasecmp(a_class,"changeset")==0)
              sp->_doc_type = REVISION;
            else if (strncasecmp(a_class,"issue",5)==0)
              sp->_doc_type = ISSUE;
          }
        pc->_current_snippet = sp;
        pc->_snippets->push_back(pc->_current_snippet);
      }
    else if (_results_flag && strcasecmp(tag,"a")==0)
      {
        const char *a_href = se_parser::get_attribute((const char**)attributes,"href");
        if (a_href)
          {
            pc->_current_snippet->set_url(_host + std::string(a_href));
            _title_flag = true;
          }
      }
    else if (_results_flag && strcasecmp(tag,"span")==0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (a_class)
          {
            if (strcasecmp(a_class,"description")==0)
              _summary_flag = true;
            else if (strcasecmp(a_class,"author")==0)
              _date_flag = true;
          }
      }
  }
Esempio n. 4
0
  void se_parser_twitter::start_element(parser_context *pc,
                                        const xmlChar *name,
                                        const xmlChar **attributes)
  {
    const char *tag = (const char*)name;

    if (strcasecmp(tag, "entry") == 0)
      {
        _in_entry = true;

        // create new snippet.
        search_snippet *sp = new search_snippet(_count + 1);
        _count++;
        sp->_engine = feeds("twitter",_url);
        sp->_doc_type = TWEET;
        pc->_current_snippet = sp;
      }
    else if (_in_entry && strcasecmp(tag, "title") == 0)
      {
        _in_title = true;
      }
    else if (_in_entry && strcasecmp(tag, "link") == 0)
      {
        const char *a_link = se_parser::get_attribute((const char**)attributes, "href");
        if (pc->_current_snippet->_url.empty())
          pc->_current_snippet->set_url(a_link);
        else pc->_current_snippet->_cached = a_link;
      }
    else if (_in_entry && strcasecmp(tag, "published") == 0)
      {
        _in_published = true;
      }
    else if (_in_entry && strcasecmp(tag, "uri") == 0)
      {
        _in_uri = true;
      }
  }
Esempio n. 5
0
Archive &ArchiveFolder::archive()
{
    DENG2_ASSERT(!feeds().empty());
    return feeds().front()->as<ArchiveFeed>().archive();
}
Esempio n. 6
0
void ArchiveFolder::flush()
{
    Folder::flush();
    feeds().front()->as<ArchiveFeed>().rewriteFile();
}
Esempio n. 7
0
  void se_parser_bing_img::start_element(parser_context *pc,
                                         const xmlChar *name,
                                         const xmlChar **attributes)
  {
    const char *tag = (const char*)name;

    //std::cout << "tag: " << tag << std::endl;

    if (strcasecmp(tag,"span") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");

        if (a_class && strcasecmp(a_class,"ic") == 0)
          {
            if (pc->_snippets->empty())
              _results_flag = true;

            // assert previous snippet if any.
            if (pc->_current_snippet)
              {
                if (pc->_current_snippet->_title.empty()  // consider the parsing did fail on the snippet.
                    || pc->_current_snippet->_url.empty()
                    || static_cast<img_search_snippet*>(pc->_current_snippet)->_cached.empty())
                  {
                    delete pc->_current_snippet;
                    pc->_current_snippet = NULL;
                    _count--;
                    pc->_snippets->pop_back();
                  }
              }

            //std::cout << "snippets size: " << pc->_snippets->size() << std::endl;

            // create new snippet.
            //std::cout << "creating new snippet\n";
            img_search_snippet *sp = new img_search_snippet(_count+1);
            sp->_safe = _safesearch;
            _count++;
            sp->_img_engine = feeds("bing_img",_url);
            pc->_current_snippet = sp;
            pc->_snippets->push_back(sp);
          }
        else if (_results_flag && a_class && strcasecmp(a_class,"md_mu") == 0)
          {
            _link_flag = true;
          }
        else if (_results_flag && a_class && strcasecmp(a_class,"md_de") == 0)
          {
            _title_flag = true;
          }
      }
    else if (_results_flag && strcasecmp(tag,"img")==0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (a_class && strcasecmp(a_class,"img_ls_u") == 0)
          {
            const char *a_src = se_parser::get_attribute((const char**)attributes,"src");
            if (a_src)
              {
                static_cast<img_search_snippet*>(pc->_current_snippet)->_cached = std::string(a_src);
              }
          }
      }
  }
Esempio n. 8
0
  void se_parser_bing::start_element(parser_context *pc,
                                     const xmlChar *name,
                                     const xmlChar **attributes)
  {
    const char *tag = (const char*)name;

    if (strcasecmp(tag,"h1") == 0)
      {
        _h1_sr_flag = true;
      }
    if (strcasecmp(tag,"div") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");

        if (a_class && strcasecmp(a_class,"sb_tlst") == 0)
          {
            if (pc->_snippets->empty())
              _results_flag = true;

            // create new snippet.
            _sn = new seeks_snippet(_count+1);
            _count++;
            _sn->_engine = feeds("bing",_url);
            pc->_current_snippet = _sn;

            _cached_flag = false; // in case previous snippet did not close the cached flag.
          }
        else if (_results_flag && a_class && (strcasecmp(a_class,"sa_cc")==0
                                              || strcasecmp(a_class,"sb_pag")==0))
          {
            // assert previous snippet if any.
            if (pc->_current_snippet)
              {
                if (pc->_current_snippet->_title.empty()  // consider the parsing did fail on the snippet.
                    || pc->_current_snippet->_url.empty()
                    || pc->_current_snippet->_summary.empty())
                  {
                    delete pc->_current_snippet;
                    pc->_current_snippet = NULL;
                    _count--;
                  }
                else pc->_snippets->push_back(pc->_current_snippet);
              }
          }
      }
    /*else if (_results_flag && strcasecmp(tag,"h2") == 0)
      {
        _results_flag = false;
    }*/
    else if (_results_flag && pc->_current_snippet && _h1_sr_flag && strcasecmp(tag,"h3") == 0)
      {
        _h3_flag = true;
      }
    else if (_results_flag && pc->_current_snippet && _h1_sr_flag && _h3_flag && strcasecmp(tag,"a") == 0)
      {
        _link_flag = true;
        const char *a_link = se_parser::get_attribute((const char**)attributes,"href");

        if (a_link)
          _link = std::string(a_link);
      }
    else if (_results_flag && pc->_current_snippet && _h1_sr_flag && strcasecmp(tag,"p") == 0)
      {
        _p_flag = true;
      }
    else if (_results_flag && _h1_sr_flag && strcasecmp(tag,"cite") == 0)
      {
        _cite_flag = true;
      }
    else if (_results_flag && _h1_sr_flag && _cached_flag && strcasecmp(tag,"a") == 0) // may not be very robust...
      {
        _cached_flag = false;
        const char *a_link = se_parser::get_attribute((const char**)attributes,"href");
        if (a_link)
          _sn->_cached = std::string(a_link);
      }
  }
    /** Retrieve the 10 most recent tweet ids in the user's news feed. Each item in the news feed must be posted by users who the user followed or by the user herself. Tweets must be ordered from most recent to least recent. */
    vector<int> getNewsFeed(int userId) {
		priority_queue<pair<int, int>, vector<pair<int, int>>, tweet> f_queue;
		auto it = userTweets.find(userId);
		if (it != userTweets.end())
		{
			for (int index = (it->second).size() - 1; index >= 0; index --)
			{
				pair<int, int> p = (it->second)[index];
				if (f_queue.size() < 10)
				{
					f_queue.push(p);
				}
				else if (p.second > f_queue.top().second)
				{
					f_queue.pop();
					f_queue.push(p);
				}
				else
				{
					break;
				}
			}
		}
		
        auto it2 = userRelationship.find(userId);
		if (it2 != userRelationship.end())
		{
			for (auto userIdIt = (it2->second).begin(); userIdIt != (it2->second).end(); userIdIt ++)
			{
				if (userIdIt->second && (userIdIt->first) != userId)
				{
					auto tit = userTweets.find(userIdIt->first);
					if (tit != userTweets.end())
					{
						for (int index = (tit->second).size() - 1; index >= 0; index --)
						{
							pair<int, int> p = (tit->second)[index];
							if (f_queue.size() < 10)
							{
								f_queue.push(p);
							}
							else if (p.second > f_queue.top().second)
							{
								f_queue.pop();
								f_queue.push(p);
							}
							else
							{
								break;
							}
						}
					}
				}
			}
		}
		
		int feedSize = f_queue.size();
		vector<int> feeds(feedSize, 0);
		while(!f_queue.empty())
		{
			feeds[--feedSize] = (f_queue.top().first);
			f_queue.pop();
		}
		
		return feeds;
    }
  void websearch_configuration::handle_config_cmd(char *cmd, const uint32_t &cmd_hash, char *arg,
      char *buf, const unsigned long &linenum)
  {
    std::vector<std::string> bpvec;
    char tmp[BUFFER_SIZE];
    int vec_count;
    char *vec[20]; // max 10 urls per feed parser.
    int i;
    feed_parser fed;
    feed_parser def_fed;
    bool def = false;
    switch (cmd_hash)
      {
      case hash_lang :
        _lang = std::string(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Websearch language");
        break;

      case hash_n :
        _Nr = atoi(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Number of websearch results per page");
        break;

      case hash_se :
        strlcpy(tmp,arg,sizeof(tmp));
        vec_count = miscutil::ssplit(tmp," \t",vec,SZ(vec),1,1);
        div_t divresult;
        divresult = div(vec_count-1,3);
        if (divresult.rem > 0)
          {
            errlog::log_error(LOG_LEVEL_ERROR, "Wrong number of parameters for search-engine "
                              "directive in websearch plugin configuration file");
            break;
          }
        if (_default_engines)
          {
            // reset engines.
            _se_enabled = feeds();
            _se_options.clear();
            _default_engines = false;
            _se_default = feeds();
          }

        fed = feed_parser(vec[0]);
        def_fed = feed_parser(vec[0]);
        for (i=1; i<vec_count; i+=3)
          {
            fed.add_url(vec[i]);
            std::string fu_name = vec[i+1];
            def = false;
            if (strcmp(vec[i+2],"default")==0)
              def = true;
            feed_url_options fuo(vec[i],fu_name,def);
            _se_options.insert(std::pair<const char*,feed_url_options>(fuo._url.c_str(),fuo));
            if (def)
              def_fed.add_url(vec[i]);
          }
        _se_enabled.add_feed(fed);
        if (!def_fed.empty())
          _se_default.add_feed(def_fed);

        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enabled search engine");
        break;

      case hash_thumbs:
        _thumbs = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enable search results webpage thumbnails");
        break;

      case hash_qcd :
        _query_context_delay = strtod(arg,NULL);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Delay in seconds before deletion of cached websearches and results");
        break;

      case hash_js:
        _js = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enable javascript use on the websearch result page");
        break;

      case hash_content_analysis:
        _content_analysis = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enable the background download of webpages pointed to by websearch results and content analysis");
        break;

      case hash_se_transfer_timeout:
        _se_transfer_timeout = atol(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Sets the transfer timeout in seconds for connections to a search engine");
        break;

      case hash_se_connect_timeout:
        _se_connect_timeout = atol(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Sets the connection timeout in seconds for connections to a search engine");
        break;

      case hash_ct_transfer_timeout:
        _ct_transfer_timeout = atol(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Sets the transfer timeout in seconds when fetching content for analysis and caching");
        break;

      case hash_ct_connect_timeout:
        _ct_connect_timeout = atol(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Sets the connection timeout in seconds when fetching content for analysis and caching");
        break;

      case hash_clustering:
        _clustering = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enables the clustering from the UI");
        break;

      case hash_max_expansions:
        _max_expansions = atoi(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Sets the maximum number of query expansions");
        break;

      case hash_extended_highlight:
        _extended_highlight = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enables a more discriminative word highlight scheme");
        break;

      case hash_background_proxy:
        _background_proxy_addr = std::string(arg);
        miscutil::tokenize(_background_proxy_addr,bpvec,":");
        if (bpvec.size()!=2)
          {
            errlog::log_error(LOG_LEVEL_ERROR, "wrong address:port for background proxy: %s",_background_proxy_addr.c_str());
            _background_proxy_addr = "";
          }
        else
          {
            _background_proxy_addr = bpvec.at(0);
            _background_proxy_port = atoi(bpvec.at(1).c_str());
          }
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Background proxy for fetching URLs");
        break;

      case hash_show_node_ip:
        _show_node_ip = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enable rendering of the node IP address in the info bar");
        break;

      case hash_personalization:
        _personalization = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enable personalized result ranking");
        break;

      case hash_result_message:
        if (!arg)
          break;
        _result_message = std::string(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Message to appear in a panel next to the search results");
        break;

      case hash_dyn_ui:
        _dyn_ui = static_cast<bool>(atoi(arg));
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Enabled the dynamic UI");
        break;

      case hash_ui_theme:
        _ui_theme = std::string(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "User Interface selected theme");
        break;

      case hash_num_reco_queries:
        _num_reco_queries = atoi(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Max number of recommended queries");
        break;

      case hash_num_recent_queries:
        _num_recent_queries = atoi(arg);
        configuration_spec::html_table_row(_config_args,cmd,arg,
                                           "Max number of recent queries");
        break;

      default:
        break;

      } // end of switch.
  }
Esempio n. 11
0
  void se_parser_wcommons::start_element(parser_context *pc,
                                         const xmlChar *name,
                                         const xmlChar **attributes)
  {
    const char *tag = (const char*)name;
    if (strcasecmp(tag,"ul") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (a_class && strcasecmp(a_class,"mw-search-results") == 0)
          {
            _sr_flag = true;
          }
      }
    else if (_sr_flag && strcasecmp(tag,"table") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (a_class && strcasecmp(a_class,"searchResultImage") == 0)
          {
            // assert previous snippet if any.
            if (pc->_current_snippet)
              {
                if (pc->_current_snippet->_title.empty()  // consider the parsing did fail on the snippet.
                    || pc->_current_snippet->_url.empty()
                    || static_cast<img_search_snippet*>(pc->_current_snippet)->_cached.empty())
                  {
                    delete pc->_current_snippet;
                    pc->_current_snippet = NULL;
                    _count--;
                    pc->_snippets->pop_back();
                  }
              }

            // create new snippet.
            img_search_snippet *sp = new img_search_snippet(_count+1);
            _count++;
            sp->_img_engine = feeds("wcommons",_url);
            pc->_current_snippet = sp;
            pc->_snippets->push_back(sp);

            if (!_results_flag)
              _results_flag = true;
          }
      }
    else if (_results_flag && strcasecmp(tag,"img") == 0)
      {
        const char *a_src = se_parser::get_attribute((const char**)attributes,"src");
        if (a_src)
          {
            static_cast<img_search_snippet*>(pc->_current_snippet)->_cached = "http:" + std::string(a_src);
          }
      }
    else if (_results_flag && strcasecmp(tag,"a") == 0)
      {
        const char *a_class = se_parser::get_attribute((const char**)attributes,"class");
        if (a_class)
          {
            const char *a_href = se_parser::get_attribute((const char**)attributes,"href");
            if (a_href)
              {
                pc->_current_snippet->set_url("http://commons.wikipedia.org" + std::string(a_href));
              }
          }
        else
          {
            const char *a_title = se_parser::get_attribute((const char**)attributes,"title");
            if (a_title)
              {
                std::string title = a_title;
                miscutil::replace_in_string(title,"File:","");
                pc->_current_snippet->_title = title;
              }
          }
      }
  }