void query_capture_element::remove_url(const DHTKey &key, const std::string &query, const std::string &url, const std::string &host, const short &url_hits, const uint32_t &radius, const std::string &plugin_name) throw (sp_exception) { std::string key_str = key.to_rstring(); if (!url.empty()) { db_query_record dbqr(plugin_name,query,radius,url,1,-url_hits); db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); if (err != SP_ERR_OK) { std::string msg = "failed removal of captured url " + url + " for query " + query + " with error " + miscutil::to_string(err); throw sp_exception(err,msg); } } if (!host.empty() && host != url) { db_query_record dbqr(plugin_name,query,radius,host,1,-url_hits); db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); if (err != SP_ERR_OK) { std::string msg = "failed storage of captured host " + host + " for query " + query + " with error " + miscutil::to_string(err); throw sp_exception(err,msg); } } }
void query_capture_element::remove_queries(const std::string &query, const std::string &plugin_name, const int &radius) throw (sp_exception) { // generate query fragments. hash_multimap<uint32_t,DHTKey,id_hash_uint> features; qprocess::generate_query_hashes(query,0, radius == -1 ? query_capture_configuration::_config->_max_radius : radius, features); // remove queries. int err = 0; hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin(); while (hit!=features.end()) { try { query_capture_element::remove_query((*hit).second,query,(*hit).first,plugin_name); } catch(sp_exception &e) { if (e.code()==DB_ERR_NO_REC) err = DB_ERR_NO_REC; else err++; } ++hit; } if (err == DB_ERR_NO_REC) throw sp_exception(err,""); if (err != 0 && err != DB_ERR_NO_REC) { std::string msg = "failed removing some or all query fragments for query " + query; throw sp_exception(QC_ERR_REMOVE_QUERY,msg); } }
db_record* udb_client::find_dbr_client(const std::string &host, const int &port, const std::string &path, const std::string &key, const std::string &pn) throw (sp_exception) { std::string url = host; if (port != -1) url += ":" + miscutil::to_string(port); url += path + "/find_dbr?"; url += "urkey=" + key; url += "&pn=" + pn; curl_mget cmg(1,udb_service_configuration::_config->_call_timeout,0, udb_service_configuration::_config->_call_timeout,0); std::vector<std::string> urls; urls.reserve(1); urls.push_back(url); std::vector<int> status; cmg.www_mget(urls,1,NULL,"",0,status); // not going through a proxy. TODO: support for external proxy. if (status[0] != 0) { // failed connection. delete[] cmg._outputs; std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : ""; std::string msg = "failed connection or transmission error in response to fetching record " + key + " from " + host + port_str + path; errlog::log_error(LOG_LEVEL_ERROR,msg.c_str()); throw sp_exception(UDBS_ERR_CONNECT,msg); } else if (status[0] && !cmg._outputs[0]) { // no result. delete cmg._outputs[0]; delete[] cmg._outputs; return NULL; } db_record *dbr = udb_client::deserialize_found_record(*cmg._outputs[0],pn); delete cmg._outputs[0]; delete[] cmg._outputs; if (!dbr) { // transmission or deserialization error. std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : ""; std::string msg = "transmission or deserialization error fetching record " + key + " from " + host + port_str + path; errlog::log_error(LOG_LEVEL_ERROR,msg.c_str()); throw sp_exception(UDBS_ERR_DESERIALIZE,msg); } return dbr; }
void query_capture_element::store_queries(const std::string &query, const std::string &plugin_name, const int &radius) throw (sp_exception) { // generate query fragments. hash_multimap<uint32_t,DHTKey,id_hash_uint> features; qprocess::generate_query_hashes(query,0, radius == -1 ? query_capture_configuration::_config->_max_radius : radius, features); // store query with hash fragment as key. int err = 0; hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin(); while (hit!=features.end()) { try { query_capture_element::store_query((*hit).second,query,(*hit).first,plugin_name); } catch(sp_exception &e) { err++; } ++hit; } if (err != 0) { std::string msg = "failed storing some or all query fragments for query " + query; throw sp_exception(QC_ERR_STORE_QUERY,msg); } }
void query_capture_element::store_url(const DHTKey &key, const std::string &query, const std::string &url, const std::string &host, const uint32_t &radius, const std::string &plugin_name, const search_snippet *sp) throw (sp_exception) { std::string key_str = key.to_rstring(); if (!url.empty()) { db_err err = SP_ERR_OK; if (!sp) { db_query_record dbqr(plugin_name,query,radius,url); err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); } else { // rec_date. struct timeval tv_now; gettimeofday(&tv_now, NULL); uint32_t rec_date = tv_now.tv_sec; uint32_t url_date = sp->_content_date; db_query_record dbqr(plugin_name,query,radius,url, 1,1,sp->_title,sp->_summary,url_date,rec_date,sp->_lang); err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); } if (err != SP_ERR_OK) { std::string msg = "failed storage of captured url " + url + " for query " + query + " with error " + miscutil::to_string(err); throw sp_exception(err,msg); } } if (!host.empty() && host != url) { db_query_record dbqr(plugin_name,query,radius,host); db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); if (err != SP_ERR_OK) { std::string msg = "failed storage of captured host " + host + " for query " + query + " with error " + miscutil::to_string(err); throw sp_exception(err,msg); } } }
String f_hphp_splfileinfo_getlinktarget(CObjRef obj) { SplFileInfo *fileInfo = get_splfileinfo(obj); String ret = f_readlink_internal(fileInfo->getFileName(), false); if (!ret.size()) { throw (Object)sp_exception(NEW(c_exception)())->create(Variant( "Unable to read link "+fileInfo->getFileName() +", error: no such file or directory")); } return ret; }
void query_capture_element::store_query(const DHTKey &key, const std::string &query, const uint32_t &radius, const std::string &plugin_name) throw (sp_exception) { std::string key_str = key.to_rstring(); db_query_record dbqr(plugin_name,query,radius); db_err err = seeks_proxy::_user_db->add_dbr(key_str,dbqr); if (err != SP_ERR_OK) { std::string msg = "failed storage of captured query fragment " + key_str + " for query " + query + " with error " + miscutil::to_string(err); errlog::log_error(LOG_LEVEL_ERROR,msg.c_str()); throw sp_exception(err,msg); } }
void sort_rank::score_and_sort_by_similarity(query_context *qc, const char *id_str, const hash_map<const char*, const char*, hash<const char*>, eqstr> *parameters, search_snippet *&ref_sp, std::vector<search_snippet*> &sorted_snippets) throw (sp_exception) { uint32_t id = (uint32_t)strtod(id_str,NULL); ref_sp = qc->get_cached_snippet(id); if (!ref_sp) // this should not happen, unless someone is forcing an url onto a Seeks node. throw sp_exception(WB_ERR_NO_REF_SIM,"cannot find ref id among cached snippets"); ref_sp->set_back_similarity_link(); bool content_analysis = websearch::_wconfig->_content_analysis; const char *ca = miscutil::lookup(parameters,"content_analysis"); if (ca && strcasecmp(ca,"on") == 0) content_analysis = true; if (content_analysis) content_handler::fetch_all_snippets_content_and_features(qc); else content_handler::fetch_all_snippets_summary_and_features(qc); // run similarity analysis and compute scores. try { content_handler::feature_based_similarity_scoring(qc,sorted_snippets.size(), &sorted_snippets.at(0),ref_sp); } catch (sp_exception &e) { throw e; } // sort snippets according to computed scores. std::stable_sort(sorted_snippets.begin(),sorted_snippets.end(),search_snippet::max_seeks_ir); }
void query_capture_element::remove_query(const DHTKey &key, const std::string &query, const uint32_t &radius, const std::string &plugin_name) throw (sp_exception) { std::string key_str = key.to_rstring(); db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,plugin_name); if (!dbr) throw sp_exception(DB_ERR_NO_REC,""); db_query_record *dbqr = static_cast<db_query_record*>(dbr); hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit; if ((hit=dbqr->_related_queries.find(query.c_str()))!=dbqr->_related_queries.end()) { // erase the query from the list, then rewrite the // record. query_data *qdata = (*hit).second; dbqr->_related_queries.erase(hit); delete qdata; seeks_proxy::_user_db->remove_dbr(key_str,plugin_name); if (!dbqr->_related_queries.empty()) seeks_proxy::_user_db->add_dbr(key_str,*dbqr); } delete dbr; }
void se_parser::parse_output_xml(char *output, std::vector<search_snippet*> *snippets, const int &count_offset) throw (sp_exception) { _count = count_offset; xmlParserCtxtPtr ctxt = NULL; parser_context pc; pc._parser = this; pc._snippets = snippets; pc._current_snippet = NULL; xmlSAXHandler saxHandler = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, start_element_wrapper, end_element_wrapper, NULL, characters_wrapper, NULL, NULL, NULL, NULL, NULL, NULL, NULL, cdata_wrapper, NULL, NULL, NULL, NULL, NULL, NULL }; //mutex_lock(&se_parser::_se_parser_mutex); int status = 0; try { ctxt = xmlCreatePushParserCtxt(&saxHandler, &pc, "", 0, ""); xmlCtxtUseOptions(ctxt,XML_PARSE_NOERROR); status = xmlParseChunk(ctxt,output,strlen(output),0); } catch (std::exception e) { errlog::log_error(LOG_LEVEL_PARSER,"Error %s in xml/html parsing of search results.", e.what()); //mutex_unlock(&se_parser::_se_parser_mutex); throw sp_exception(WB_ERR_PARSE,e.what()); } catch (...) // catch everything else to avoid crashes. { std::string msg = "Unknown error in xml/html parsing of search results"; errlog::log_error(LOG_LEVEL_PARSER,msg.c_str()); //mutex_unlock(&se_parser::_se_parser_mutex); throw sp_exception(WB_ERR_PARSE,msg); } if (status == 0) { if (ctxt) xmlFreeParserCtxt(ctxt); //mutex_unlock(&se_parser::_se_parser_mutex); } else // an error occurred. { xmlErrorPtr xep = xmlCtxtGetLastError(ctxt); if (xep) { std::string err_msg = std::string(xep->message); miscutil::replace_in_string(err_msg,"\n",""); errlog::log_error(LOG_LEVEL_PARSER, "html level parsing error (libxml2): %s", err_msg.c_str()); // check on error level. if (xep->level == 3) // fatal or recoverable error. { std::string msg = "libxml2 fatal error"; errlog::log_error(LOG_LEVEL_PARSER,msg.c_str()); if (ctxt) xmlFreeParserCtxt(ctxt); //mutex_unlock(&se_parser::_se_parser_mutex); throw sp_exception(WB_ERR_PARSE,msg); } // XXX: too verbose, and confusing to users. else if (xep->level == 2) { std::string msg = "libxml2 recoverable error"; errlog::log_error(LOG_LEVEL_DEBUG,msg.c_str()); if (ctxt) xmlFreeParserCtxt(ctxt); //mutex_unlock(&se_parser::_se_parser_mutex); //throw sp_exception(WB_ERR_PARSE,msg); } } } }
db_record* udb_client::find_bqc(const std::string &host, const int &port, const std::string &path, const std::string &query, const uint32_t &expansion) throw (sp_exception) { static std::string ctype = "Content-Type: application/x-protobuf"; // create halo of hashes. hash_multimap<uint32_t,DHTKey,id_hash_uint> qhashes; qprocess::generate_query_hashes(query,0,5,qhashes); // TODO: 5 in configuration (cf). std::string msg; try { halo_msg_wrapper::serialize(expansion,qhashes,msg); } catch(sp_exception &e) { errlog::log_error(LOG_LEVEL_ERROR,e.what().c_str()); throw e; } std::string url = host; if (port != -1) url += ":" + miscutil::to_string(port); url += path + "/find_bqc?"; curl_mget cmg(1,udb_service_configuration::_config->_call_timeout,0, udb_service_configuration::_config->_call_timeout,0); std::vector<std::string> urls; urls.reserve(1); urls.push_back(url); errlog::log_error(LOG_LEVEL_DEBUG,"call: %s",url.c_str()); std::vector<int> status; cmg.www_mget(urls,1,NULL,"",0,status, NULL,NULL,"POST",&msg,msg.length()*sizeof(char), ctype); // not going through a proxy. TODO: support for external proxy. if (status[0] !=0) { // failed connection. std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : ""; std::string msg = "failed connection or transmission error, nothing found in find_bqc response to query " + query + " from " + host + port_str + path; errlog::log_error(LOG_LEVEL_DEBUG,msg.c_str()); delete[] cmg._outputs; throw sp_exception(UDBS_ERR_CONNECT,msg); } else if (status[0] == 0 && !cmg._outputs[0]) { // no result. delete cmg._outputs[0]; delete[] cmg._outputs; return NULL; } db_record *dbr = udb_client::deserialize_found_record(*cmg._outputs[0],"query-capture"); delete cmg._outputs[0]; delete[] cmg._outputs; if (!dbr) { // transmission or deserialization error. std::string port_str = (port != -1) ? ":" + miscutil::to_string(port) : ""; std::string msg = "transmission or deserialization error fetching batch records for query " + query + " from " + host + port_str + path; errlog::log_error(LOG_LEVEL_ERROR,msg.c_str()); throw sp_exception(UDBS_ERR_DESERIALIZE,msg); } return dbr; }
void query_capture_element::store_queries(const std::string &q, const query_context *qc, const std::string &url, const std::string &host, const std::string &plugin_name, const int &radius) throw (sp_exception) { std::string query = q; if (qc) query = qc->_lc_query; // generate query fragments. hash_multimap<uint32_t,DHTKey,id_hash_uint> features; qprocess::generate_query_hashes(query,0, radius == -1 ? query_capture_configuration::_config->_max_radius : radius, features); // push URL into the user db buckets with query fragments as key. // URLs are stored only for queries of radius 0. This scheme allows to save // DB space. To recover URLs from a query of radius > 1, a second lookup is necessary, // for the recorded query of radius 0 that holds the URL counters. int uerr = 0; int qerr = 0; hash_multimap<uint32_t,DHTKey,id_hash_uint>::const_iterator hit = features.begin(); while (hit!=features.end()) { if ((*hit).first == 0) // radius == 0. { try { if (!query_capture_configuration::_config->_save_url_data) query_capture_element::store_url((*hit).second,query,url,host,(*hit).first,plugin_name); else { // grab snippet and title, if available from the websearch plugin cache. search_snippet *sp = NULL; if (qc) { sp = qc->get_cached_snippet(url); query_capture_element::store_url((*hit).second,query,url,host, (*hit).first,plugin_name,sp); } else { query_capture_element::store_url((*hit).second,query,url,host, (*hit).first,plugin_name,NULL); } } } catch (sp_exception &e) { uerr++; } } else // store query alone. { try { query_capture_element::store_query((*hit).second,query,(*hit).first,plugin_name); } catch (sp_exception &e) { qerr++; } } ++hit; } if (uerr && qerr) { std::string msg = "failed storing URL " + url + " and query fragments for query " + query; throw sp_exception(QC_ERR_STORE,msg); } else if (uerr) { std::string msg = "failed storing URL " + url + " for query " + query; throw sp_exception(QC_ERR_STORE_URL,msg); } else if (qerr) { std::string msg = "failed storing some or all query fragments for query " + query; throw sp_exception(QC_ERR_STORE_QUERY,msg); } }