SEXP setScoringRules(string method, string parameters){ vector<string> scoringRules; //if(method == "tfidf" || method == "Okapi" || method == "BM25" ){ // string rule = method + "," + parameters; //environment.setBaseline(rule); //scoringRules.push_back(""); //environment.setScoringRules(scoringRules); //}else{ string rule = "method:" + method + "," + parameters; scoringRules.push_back(rule); environment.setScoringRules(scoringRules); //} return R_NilValue; }
static void open_indexes( indri::api::QueryEnvironment& environment, indri::api::Parameters& param ) { if( param.exists( "index" ) ) { indri::api::Parameters indexes = param["index"]; for( unsigned int i=0; i < indexes.size(); i++ ) { environment.addIndex( std::string(indexes[i]) ); } } if( param.exists( "server" ) ) { indri::api::Parameters servers = param["server"]; for( unsigned int i=0; i < servers.size(); i++ ) { environment.addServer( std::string(servers[i]) ); } } std::vector<std::string> smoothingRules; if( copy_parameters_to_string_vector( smoothingRules, param, "rule" ) ) environment.setScoringRules( smoothingRules ); }
UINT64 initialize() { _environment.setSingleBackgroundModel( _parameters.get("singleBackgroundModel", false) ); std::vector<std::string> stopwords; if( copy_parameters_to_string_vector( stopwords, _parameters, "stopper.word" ) ) _environment.setStopwords(stopwords); std::vector<std::string> smoothingRules; if( copy_parameters_to_string_vector( smoothingRules, _parameters, "rule" ) ) _environment.setScoringRules( smoothingRules ); if( _parameters.exists( "index" ) ) { indri::api::Parameters indexes = _parameters["index"]; for( size_t i=0; i < indexes.size(); i++ ) { _environment.addIndex( std::string(indexes[i]) ); } } if( _parameters.exists( "server" ) ) { indri::api::Parameters servers = _parameters["server"]; for( size_t i=0; i < servers.size(); i++ ) { _environment.addServer( std::string(servers[i]) ); } } if( _parameters.exists("maxWildcardTerms") ) _environment.setMaxWildcardTerms(_parameters.get("maxWildcardTerms", 100)); _requested = _parameters.get( "count", 1000 ); _initialRequested = _parameters.get( "fbDocs", _requested ); _runID = _parameters.get( "runID", "indri" ); _trecFormat = _parameters.get( "trecFormat" , false ); _inexFormat = _parameters.exists( "inex" ); _printQuery = _parameters.get( "printQuery", false ); _printDocuments = _parameters.get( "printDocuments", false ); _printPassages = _parameters.get( "printPassages", false ); _printSnippets = _parameters.get( "printSnippets", false ); if (_parameters.exists("baseline")) { // doing a baseline std::string baseline = _parameters["baseline"]; _environment.setBaseline(baseline); // need a factory for this... if( _parameters.get( "fbDocs", 0 ) != 0 ) { // have to push the method in... std::string rule = "method:" + baseline; _parameters.set("rule", rule); _expander = new indri::query::TFIDFExpander( &_environment, _parameters ); } } else { if( _parameters.get( "fbDocs", 0 ) != 0 ) { _expander = new indri::query::RMExpander( &_environment, _parameters ); } } if (_parameters.exists("maxWildcardTerms")) { _environment.setMaxWildcardTerms((int)_parameters.get("maxWildcardTerms")); } return 0; }
UINT64 initialize() { try { _environment.setSingleBackgroundModel( _parameters.get("singleBackgroundModel", false) ); std::vector<std::string> stopwords; if( copy_parameters_to_string_vector( stopwords, _parameters, "stopper.word" ) ) _environment.setStopwords(stopwords); std::vector<std::string> smoothingRules; if( copy_parameters_to_string_vector( smoothingRules, _parameters, "rule" ) ) _environment.setScoringRules( smoothingRules ); if( _parameters.exists( "index" ) ) { indri::api::Parameters indexes = _parameters["index"]; for( size_t i=0; i < indexes.size(); i++ ) { _environment.addIndex( std::string(indexes[i]) ); } } if( _parameters.exists( "server" ) ) { indri::api::Parameters servers = _parameters["server"]; for( size_t i=0; i < servers.size(); i++ ) { _environment.addServer( std::string(servers[i]) ); } } if( _parameters.exists("maxWildcardTerms") ) _environment.setMaxWildcardTerms(_parameters.get("maxWildcardTerms", 100)); _requested = _parameters.get( "count", 1000 ); _initialRequested = _parameters.get( "fbDocs", _requested ); _runID = _parameters.get( "runID", "indri" ); _trecFormat = _parameters.get( "trecFormat" , false ); _inexFormat = _parameters.exists( "inex" ); _printQuery = _parameters.get( "printQuery", false ); _printDocuments = _parameters.get( "printDocuments", false ); _printPassages = _parameters.get( "printPassages", false ); _printSnippets = _parameters.get( "printSnippets", false ); if (_parameters.exists("baseline")) { // doing a baseline std::string baseline = _parameters["baseline"]; _environment.setBaseline(baseline); // need a factory for this... if( _parameters.get( "fbDocs", 0 ) != 0 ) { // have to push the method in... std::string rule = "method:" + baseline; _parameters.set("rule", rule); _expander = new indri::query::TFIDFExpander( &_environment, _parameters ); } } else { if( _parameters.get( "fbDocs", 0 ) != 0 ) { _expander = new indri::query::RMExpander( &_environment, _parameters ); } } if (_parameters.exists("maxWildcardTerms")) { _environment.setMaxWildcardTerms((int)_parameters.get("maxWildcardTerms")); } } catch ( lemur::api::Exception& e ) { while( _queries.size() ) { query_t *query = _queries.front(); _queries.pop(); _output.push( new query_t( query->index, query->number, "query: " + query->number + " QueryThread::_initialize exception\n" ) ); _queueEvent.notifyAll(); LEMUR_RETHROW(e, "QueryThread::_initialize"); } } return 0; }