Ejemplo n.º 1
0
 void query_capture_element::remove_url(const DHTKey &key, const std::string &query,
                                        const std::string &url, const std::string &host,
                                        const short &url_hits, const uint32_t &radius,
                                        const std::string &plugin_name) throw (sp_exception)
 {
   std::string key_str = key.to_rstring();
   if (!url.empty())
     {
       db_query_record dbqr(plugin_name,query,radius,url,1,-url_hits);
       db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
       if (err != SP_ERR_OK)
         {
           std::string msg = "failed removal of captured url " + url + " for query " + query + " with error "
                             + miscutil::to_string(err);
           throw sp_exception(err,msg);
         }
     }
   if (!host.empty() && host != url)
     {
       db_query_record dbqr(plugin_name,query,radius,host,1,-url_hits);
       db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
       if (err != SP_ERR_OK)
         {
           std::string msg = "failed storage of captured host " + host + " for query " + query + " with error "
                             + miscutil::to_string(err);
           throw sp_exception(err,msg);
         }
     }
 }
Ejemplo n.º 2
0
  void query_capture_element::remove_queries(const std::string &query,
      const std::string &plugin_name,
      const int &radius) throw (sp_exception)
  {
    // generate query fragments.
    hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
    qprocess::generate_query_hashes(query,0,
                                    radius == -1 ? query_capture_configuration::_config->_max_radius : radius,
                                    features);

    // remove queries.
    int err = 0;
    hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin();
    while (hit!=features.end())
      {
        try
          {
            query_capture_element::remove_query((*hit).second,query,(*hit).first,plugin_name);
          }
        catch(sp_exception &e)
          {
            if (e.code()==DB_ERR_NO_REC)
              err = DB_ERR_NO_REC;
            else err++;
          }
        ++hit;
      }
    if (err == DB_ERR_NO_REC)
      throw sp_exception(err,"");
    if (err != 0 && err != DB_ERR_NO_REC)
      {
        std::string msg = "failed removing some or all query fragments for query " + query;
        throw sp_exception(QC_ERR_REMOVE_QUERY,msg);
      }
  }
Ejemplo n.º 3
0
 db_record* udb_client::find_dbr_client(const std::string &host,
                                        const int &port,
                                        const std::string &path,
                                        const std::string &key,
                                        const std::string &pn) throw (sp_exception)
 {
   std::string url = host;
   if (port != -1)
     url += ":" + miscutil::to_string(port);
   url += path + "/find_dbr?";
   url += "urkey=" + key;
   url += "&pn=" + pn;
   curl_mget cmg(1,udb_service_configuration::_config->_call_timeout,0,
                 udb_service_configuration::_config->_call_timeout,0);
   std::vector<std::string> urls;
   urls.reserve(1);
   urls.push_back(url);
   std::vector<int> status;
   cmg.www_mget(urls,1,NULL,"",0,status); // not going through a proxy. TODO: support for external proxy.
   if (status[0] != 0)
     {
       // failed connection.
       delete[] cmg._outputs;
       std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : "";
       std::string msg = "failed connection or transmission error in response to fetching record "
                         + key + " from " + host + port_str + path;
       errlog::log_error(LOG_LEVEL_ERROR,msg.c_str());
       throw sp_exception(UDBS_ERR_CONNECT,msg);
     }
   else if (status[0] && !cmg._outputs[0])
     {
       // no result.
       delete cmg._outputs[0];
       delete[] cmg._outputs;
       return NULL;
     }
   db_record *dbr = udb_client::deserialize_found_record(*cmg._outputs[0],pn);
   delete cmg._outputs[0];
   delete[] cmg._outputs;
   if (!dbr)
     {
       // transmission or deserialization error.
       std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : "";
       std::string msg = "transmission or deserialization error fetching record "
                         + key + " from " + host + port_str + path;
       errlog::log_error(LOG_LEVEL_ERROR,msg.c_str());
       throw sp_exception(UDBS_ERR_DESERIALIZE,msg);
     }
   return dbr;
 }
Ejemplo n.º 4
0
  void query_capture_element::store_queries(const std::string &query,
      const std::string &plugin_name,
      const int &radius) throw (sp_exception)
  {
    // generate query fragments.
    hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
    qprocess::generate_query_hashes(query,0,
                                    radius == -1 ? query_capture_configuration::_config->_max_radius : radius,
                                    features);

    // store query with hash fragment as key.
    int err = 0;
    hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin();
    while (hit!=features.end())
      {
        try
          {
            query_capture_element::store_query((*hit).second,query,(*hit).first,plugin_name);
          }
        catch(sp_exception &e)
          {
            err++;
          }
        ++hit;
      }
    if (err != 0)
      {
        std::string msg = "failed storing some or all query fragments for query " + query;
        throw sp_exception(QC_ERR_STORE_QUERY,msg);
      }
  }
Ejemplo n.º 5
0
 void query_capture_element::store_url(const DHTKey &key, const std::string &query,
                                       const std::string &url, const std::string &host,
                                       const uint32_t &radius,
                                       const std::string &plugin_name,
                                       const search_snippet *sp) throw (sp_exception)
 {
   std::string key_str = key.to_rstring();
   if (!url.empty())
     {
       db_err err = SP_ERR_OK;
       if (!sp)
         {
           db_query_record dbqr(plugin_name,query,radius,url);
           err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
         }
       else
         {
           // rec_date.
           struct timeval tv_now;
           gettimeofday(&tv_now, NULL);
           uint32_t rec_date = tv_now.tv_sec;
           uint32_t url_date = sp->_content_date;
           db_query_record dbqr(plugin_name,query,radius,url,
                                1,1,sp->_title,sp->_summary,url_date,rec_date,sp->_lang);
           err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
         }
       if (err != SP_ERR_OK)
         {
           std::string msg = "failed storage of captured url " + url + " for query " + query + " with error "
                             + miscutil::to_string(err);
           throw sp_exception(err,msg);
         }
     }
   if (!host.empty() && host != url)
     {
       db_query_record dbqr(plugin_name,query,radius,host);
       db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
       if (err != SP_ERR_OK)
         {
           std::string msg = "failed storage of captured host " + host + " for query " + query + " with error "
                             + miscutil::to_string(err);
           throw sp_exception(err,msg);
         }
     }
 }
Ejemplo n.º 6
0
String f_hphp_splfileinfo_getlinktarget(CObjRef obj) {
  SplFileInfo *fileInfo = get_splfileinfo(obj);
  String ret = f_readlink_internal(fileInfo->getFileName(), false);
  if (!ret.size())  {
    throw (Object)sp_exception(NEW(c_exception)())->create(Variant(
      "Unable to read link "+fileInfo->getFileName()
      +", error: no such file or directory"));
  }
  return ret;
}
Ejemplo n.º 7
0
 void query_capture_element::store_query(const DHTKey &key,
                                         const std::string &query,
                                         const uint32_t &radius,
                                         const std::string &plugin_name) throw (sp_exception)
 {
   std::string key_str = key.to_rstring();
   db_query_record dbqr(plugin_name,query,radius);
   db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr);
   if (err != SP_ERR_OK)
     {
       std::string msg = "failed storage of captured query fragment " + key_str + " for query " + query + " with error "
                         + miscutil::to_string(err);
       errlog::log_error(LOG_LEVEL_ERROR,msg.c_str());
       throw sp_exception(err,msg);
     }
 }
Ejemplo n.º 8
0
  void sort_rank::score_and_sort_by_similarity(query_context *qc, const char *id_str,
      const hash_map<const char*, const char*, hash<const char*>, eqstr> *parameters,
      search_snippet *&ref_sp,
      std::vector<search_snippet*> &sorted_snippets) throw (sp_exception)
  {
    uint32_t id = (uint32_t)strtod(id_str,NULL);

    ref_sp = qc->get_cached_snippet(id);

    if (!ref_sp) // this should not happen, unless someone is forcing an url onto a Seeks node.
      throw sp_exception(WB_ERR_NO_REF_SIM,"cannot find ref id among cached snippets");

    ref_sp->set_back_similarity_link();

    bool content_analysis = websearch::_wconfig->_content_analysis;
    const char *ca = miscutil::lookup(parameters,"content_analysis");
    if (ca && strcasecmp(ca,"on") == 0)
      content_analysis = true;

    if (content_analysis)
      content_handler::fetch_all_snippets_content_and_features(qc);
    else content_handler::fetch_all_snippets_summary_and_features(qc);

    // run similarity analysis and compute scores.
    try
      {
        content_handler::feature_based_similarity_scoring(qc,sorted_snippets.size(),
            &sorted_snippets.at(0),ref_sp);
      }
    catch (sp_exception &e)
      {
        throw e;
      }

    // sort snippets according to computed scores.
    std::stable_sort(sorted_snippets.begin(),sorted_snippets.end(),search_snippet::max_seeks_ir);
  }
Ejemplo n.º 9
0
 void query_capture_element::remove_query(const DHTKey &key,
     const std::string &query,
     const uint32_t &radius,
     const std::string &plugin_name) throw (sp_exception)
 {
   std::string key_str = key.to_rstring();
   db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,plugin_name);
   if (!dbr)
     throw sp_exception(DB_ERR_NO_REC,"");
   db_query_record *dbqr = static_cast<db_query_record*>(dbr);
   hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit;
   if ((hit=dbqr->_related_queries.find(query.c_str()))!=dbqr->_related_queries.end())
     {
       // erase the query from the list, then rewrite the
       // record.
       query_data *qdata = (*hit).second;
       dbqr->_related_queries.erase(hit);
       delete qdata;
       seeks_proxy::_user_db->remove_dbr(key_str,plugin_name);
       if (!dbqr->_related_queries.empty())
         seeks_proxy::_user_db->add_dbr(key_str,*dbqr);
     }
   delete dbr;
 }
Ejemplo n.º 10
0
  void se_parser::parse_output_xml(char *output, std::vector<search_snippet*> *snippets,
                                   const int &count_offset) throw (sp_exception)
  {
    _count = count_offset;

    xmlParserCtxtPtr ctxt = NULL;
    parser_context pc;
    pc._parser = this;
    pc._snippets = snippets;
    pc._current_snippet = NULL;

    xmlSAXHandler saxHandler =
    {
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      start_element_wrapper,
      end_element_wrapper,
      NULL,
      characters_wrapper,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      cdata_wrapper,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL
    };

    //mutex_lock(&se_parser::_se_parser_mutex);

    int status = 0;
    try
      {
        ctxt = xmlCreatePushParserCtxt(&saxHandler, &pc, "", 0, "");
        xmlCtxtUseOptions(ctxt,XML_PARSE_NOERROR);

        status = xmlParseChunk(ctxt,output,strlen(output),0);
      }
    catch (std::exception e)
      {
        errlog::log_error(LOG_LEVEL_PARSER,"Error %s in xml/html parsing of search results.",
                          e.what());
        //mutex_unlock(&se_parser::_se_parser_mutex);
        throw sp_exception(WB_ERR_PARSE,e.what());
      }
    catch (...) // catch everything else to avoid crashes.
      {
        std::string msg = "Unknown error in xml/html parsing of search results";
        errlog::log_error(LOG_LEVEL_PARSER,msg.c_str());
        //mutex_unlock(&se_parser::_se_parser_mutex);
        throw sp_exception(WB_ERR_PARSE,msg);
      }

    if (status == 0)
      {
        if (ctxt)
          xmlFreeParserCtxt(ctxt);
        //mutex_unlock(&se_parser::_se_parser_mutex);
      }
    else // an error occurred.
      {
        xmlErrorPtr xep = xmlCtxtGetLastError(ctxt);
        if (xep)
          {
            std::string err_msg = std::string(xep->message);
            miscutil::replace_in_string(err_msg,"\n","");
            errlog::log_error(LOG_LEVEL_PARSER, "html level parsing error (libxml2): %s",
                              err_msg.c_str());
            // check on error level.
            if (xep->level == 3) // fatal or recoverable error.
              {
                std::string msg = "libxml2 fatal error";
                errlog::log_error(LOG_LEVEL_PARSER,msg.c_str());
                if (ctxt)
                  xmlFreeParserCtxt(ctxt);
                //mutex_unlock(&se_parser::_se_parser_mutex);
                throw sp_exception(WB_ERR_PARSE,msg);
              }
            // XXX: too verbose, and confusing to users.
            else if (xep->level == 2)
              {
                std::string msg = "libxml2 recoverable error";
                errlog::log_error(LOG_LEVEL_DEBUG,msg.c_str());
                if (ctxt)
                  xmlFreeParserCtxt(ctxt);
                //mutex_unlock(&se_parser::_se_parser_mutex);
                //throw sp_exception(WB_ERR_PARSE,msg);
              }
          }
      }
  }
Ejemplo n.º 11
0
  db_record* udb_client::find_bqc(const std::string &host,
                                  const int &port,
                                  const std::string &path,
                                  const std::string &query,
                                  const uint32_t &expansion) throw (sp_exception)
  {
    static std::string ctype = "Content-Type: application/x-protobuf";

    // create halo of hashes.
    hash_multimap<uint32_t,DHTKey,id_hash_uint> qhashes;
    qprocess::generate_query_hashes(query,0,5,qhashes); // TODO: 5 in configuration (cf).
    std::string msg;
    try
      {
        halo_msg_wrapper::serialize(expansion,qhashes,msg);
      }
    catch(sp_exception &e)
      {
        errlog::log_error(LOG_LEVEL_ERROR,e.what().c_str());
        throw e;
      }

    std::string url = host;
    if (port != -1)
      url += ":" + miscutil::to_string(port);
    url += path + "/find_bqc?";
    curl_mget cmg(1,udb_service_configuration::_config->_call_timeout,0,
                  udb_service_configuration::_config->_call_timeout,0);
    std::vector<std::string> urls;
    urls.reserve(1);
    urls.push_back(url);
    errlog::log_error(LOG_LEVEL_DEBUG,"call: %s",url.c_str());
    std::vector<int> status;
    cmg.www_mget(urls,1,NULL,"",0,status,
                 NULL,NULL,"POST",&msg,msg.length()*sizeof(char),
                 ctype); // not going through a proxy. TODO: support for external proxy.
    if (status[0] !=0)
      {
        // failed connection.
        std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : "";
        std::string msg = "failed connection or transmission error, nothing found in find_bqc response to query "
                          + query + " from " + host + port_str + path;
        errlog::log_error(LOG_LEVEL_DEBUG,msg.c_str());
        delete[] cmg._outputs;
        throw sp_exception(UDBS_ERR_CONNECT,msg);
      }
    else if (status[0] == 0 && !cmg._outputs[0])
      {
        // no result.
        delete cmg._outputs[0];
        delete[] cmg._outputs;
        return NULL;
      }
    db_record *dbr = udb_client::deserialize_found_record(*cmg._outputs[0],"query-capture");
    delete cmg._outputs[0];
    delete[] cmg._outputs;
    if (!dbr)
      {
        // transmission or deserialization error.
        std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : "";
        std::string msg = "transmission or deserialization error fetching batch records for query "
                          + query + " from " + host + port_str + path;
        errlog::log_error(LOG_LEVEL_ERROR,msg.c_str());
        throw sp_exception(UDBS_ERR_DESERIALIZE,msg);
      }
    return dbr;
  }
Ejemplo n.º 12
0
  void query_capture_element::store_queries(const std::string &q,
      const query_context *qc,
      const std::string &url, const std::string &host,
      const std::string &plugin_name,
      const int &radius) throw (sp_exception)
  {
    std::string query = q;
    if (qc)
      query = qc->_lc_query;

    // generate query fragments.
    hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
    qprocess::generate_query_hashes(query,0,
                                    radius == -1 ? query_capture_configuration::_config->_max_radius : radius,
                                    features);

    // push URL into the user db buckets with query fragments as key.
    // URLs are stored only for queries of radius 0. This scheme allows to save
    // DB space. To recover URLs from a query of radius > 1, a second lookup is necessary,
    // for the recorded query of radius 0 that holds the URL counters.
    int uerr = 0;
    int qerr = 0;
    hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin();
    while (hit!=features.end())
      {
        if ((*hit).first == 0) // radius == 0.
          {
            try
              {
                if (!query_capture_configuration::_config->_save_url_data)
                  query_capture_element::store_url((*hit).second,query,url,host,(*hit).first,plugin_name);
                else
                  {
                    // grab snippet and title, if available from the websearch plugin cache.
                    search_snippet *sp = NULL;
                    if (qc)
                      {
                        sp = qc->get_cached_snippet(url);
                        query_capture_element::store_url((*hit).second,query,url,host,
                                                         (*hit).first,plugin_name,sp);
                      }
                    else
                      {
                        query_capture_element::store_url((*hit).second,query,url,host,
                                                         (*hit).first,plugin_name,NULL);
                      }
                  }
              }
            catch (sp_exception &e)
              {
                uerr++;
              }
          }
        else  // store query alone.
          {
            try
              {
                query_capture_element::store_query((*hit).second,query,(*hit).first,plugin_name);
              }
            catch (sp_exception &e)
              {
                qerr++;
              }
          }
        ++hit;
      }
    if (uerr && qerr)
      {
        std::string msg = "failed storing URL " + url + " and query fragments for query " + query;
        throw sp_exception(QC_ERR_STORE,msg);
      }
    else if (uerr)
      {
        std::string msg = "failed storing URL " + url + " for query " + query;
        throw sp_exception(QC_ERR_STORE_URL,msg);
      }
    else if (qerr)
      {
        std::string msg = "failed storing some or all query fragments for query " + query;
        throw sp_exception(QC_ERR_STORE_QUERY,msg);
      }
  }