void bm25_tune (const std::shared_ptr<index::dblru_inverted_index> & idx, std::vector<corpus::document> & allqueries, index::ir_eval & eval, double & k1, double & b, double & k3) { double k1values [12] = {1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 3.0, 4.0, 5.0}; // Different values for the parameter k1 double bvalues [12] = {0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0, 1.5, 2.0, 3.0}; // Different values for the parameter b double k3values [7] = {200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0};// Different values for parameter k3 double maxmap = 0; // Stores the current maximum MAP value double k1max = 1.2; double bmax = 0.75; double k3max = 500.0; std::ofstream writeout; writeout.open("Assignment2/tuning.txt"); if (!writeout.is_open()) { std::cout<<"Problem writing the output of tuning to the system. Make sure the program has enough writing privileges. Quiting..."<<std::endl; } writeout << "k1" << "\t" << "b" << "\t" << "k3" << "\t" << "MAP" << "\n"; auto ranker = make_unique<bm25_ranker>(); // creates a pointer to a bm25_ranker instance for (int i=0 ; i<12 ; i++) // Loops over all k1 values { for (int j=0 ; j<12 ; j++) // Loops over all b values { for (int k=0 ; k<7 ; k++) // Loops over all k3 values { ranker->set_param(k1values[i],bvalues[j],k3values[k]); // Sets the parameters of ranker to the current values std::cout << "Tuning for parameters : " << "k1 = " << k1values[i] << "; b = " << bvalues[j] << "; k3 = " << k3values[k] << std::endl; for (std::vector<corpus::document>::iterator query = allqueries.begin(); query != allqueries.end(); ++query) // Iterates over all queries in allqueries { auto ranking = ranker->score(*idx, *query, 50); // Returns a ranked list of the top 1000 documents for the current query eval.avg_p(ranking,(*query).id(),50); // eval.avg_p stores the value of average precision for the current query in the instance eval } writeout << k1values[i] << "\t" << bvalues[j] << "\t" << k3values[k] << "\t" << eval.map() << "\n"; if (eval.map() > maxmap) // Updates maxmap, cmax, lambdamax if the current map, which is equal to eval.map(), is greater than maxmap { // You should only change the values of the following three assignments maxmap = eval.map(); // Change 0 to the correct value DONE k1max = k1values[i]; // Change 0 to the correct value DONE bmax = bvalues[j]; // Change 0 to the correct value DONE k3max = k3values[k]; } eval.reset_stats(); // Deletes all the average precision values stored in eval to allow correct calculation of MAP for the next parameter combination } } } writeout.close(); std::cout<<"Max MAP = "<< maxmap << " achieved by " << "k1 = " << k1max << ", b = " << bmax << ", k3 = " << k3max << std::endl; // Prints to the standard ouput k1 = k1max; // Returns the best c value to the calling function b = bmax; k3 = k3max; // Returns the best lambda value to the calling function }
void check_query(index::ir_eval& eval, const std::vector<std::pair<doc_id, double>>& ranking, query_id qid, double e_f1, double e_p, double e_r, double e_avg_p, double e_ndcg, uint64_t num_docs = std::numeric_limits<uint64_t>::max()) { auto f1 = eval.f1(ranking, qid, num_docs); auto p = eval.precision(ranking, qid, num_docs); auto r = eval.recall(ranking, qid, num_docs); auto avg_p = eval.avg_p(ranking, qid, num_docs); auto ndcg = eval.ndcg(ranking, qid, num_docs); ASSERT_APPROX_EQUAL(f1, e_f1); ASSERT_APPROX_EQUAL(p, e_p); ASSERT_APPROX_EQUAL(r, e_r); ASSERT_APPROX_EQUAL(avg_p, e_avg_p); ASSERT_APPROX_EQUAL(ndcg, e_ndcg); }