Esempio n. 1
0
void bm25_tune (const std::shared_ptr<index::dblru_inverted_index> & idx, std::vector<corpus::document> & allqueries, index::ir_eval & eval, double & k1, double & b, double & k3)
{

    double k1values [12] = {1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 3.0, 4.0, 5.0}; // Different values for the parameter k1
    double bvalues [12] = {0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0, 1.5, 2.0, 3.0}; // Different values for the parameter b
    double k3values [7] = {200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0};// Different values for parameter k3
    double maxmap = 0; // Stores the current maximum MAP value
    double k1max = 1.2;
    double bmax = 0.75;
    double k3max = 500.0;
    std::ofstream writeout;

    writeout.open("Assignment2/tuning.txt");
    if (!writeout.is_open())
    {
        std::cout<<"Problem writing the output of tuning to the system. Make sure the program has enough writing privileges. Quiting..."<<std::endl;
    }
    writeout <<  "k1" << "\t" << "b" << "\t" << "k3" << "\t" << "MAP" << "\n";

    auto ranker = make_unique<bm25_ranker>(); // creates a pointer to a bm25_ranker instance

    for (int i=0 ; i<12 ; i++) // Loops over all k1 values
    {
        for (int j=0 ; j<12 ; j++) // Loops over all b values
         {
            for (int k=0 ; k<7 ; k++) // Loops over all k3 values
            {
                ranker->set_param(k1values[i],bvalues[j],k3values[k]); // Sets the parameters of ranker to the current values
                std::cout << "Tuning for parameters : " << "k1 = " << k1values[i] << "; b = " << bvalues[j] << "; k3 = " << k3values[k] << std::endl;
                for (std::vector<corpus::document>::iterator query = allqueries.begin(); query != allqueries.end(); ++query) // Iterates over all queries in allqueries
                {
                    auto ranking = ranker->score(*idx, *query, 50); // Returns a ranked list of the top 1000 documents for the current query
                    eval.avg_p(ranking,(*query).id(),50); // eval.avg_p stores the value of average precision for the current query in the instance eval
                }

                writeout <<  k1values[i] << "\t" << bvalues[j] << "\t" << k3values[k] << "\t" << eval.map() << "\n";


                if (eval.map() > maxmap) // Updates maxmap, cmax, lambdamax if the current map, which is equal to eval.map(), is greater than maxmap
                {
                    // You should only change the values of the following three assignments
                    maxmap = eval.map(); // Change 0 to the correct value DONE
                    k1max = k1values[i]; // Change 0 to the correct value DONE
                    bmax = bvalues[j]; // Change 0 to the correct value DONE
                    k3max = k3values[k];
                }

                eval.reset_stats(); // Deletes all the average precision values stored in eval to allow correct calculation of MAP for the next parameter combination
            }
         }
    }

    writeout.close();
    std::cout<<"Max MAP = "<< maxmap << " achieved by " << "k1 = " << k1max << ", b = " << bmax << ", k3 = " << k3max << std::endl; // Prints to the standard ouput
    k1 = k1max; // Returns the best c value to the calling function
    b = bmax;
    k3 = k3max; // Returns the best lambda value to the calling function
}
Esempio n. 2
0
void check_query(index::ir_eval& eval,
                 const std::vector<std::pair<doc_id, double>>& ranking,
                 query_id qid, double e_f1, double e_p, double e_r,
                 double e_avg_p, double e_ndcg,
                 uint64_t num_docs = std::numeric_limits<uint64_t>::max())
{
    auto f1 = eval.f1(ranking, qid, num_docs);
    auto p = eval.precision(ranking, qid, num_docs);
    auto r = eval.recall(ranking, qid, num_docs);
    auto avg_p = eval.avg_p(ranking, qid, num_docs);
    auto ndcg = eval.ndcg(ranking, qid, num_docs);
    ASSERT_APPROX_EQUAL(f1, e_f1);
    ASSERT_APPROX_EQUAL(p, e_p);
    ASSERT_APPROX_EQUAL(r, e_r);
    ASSERT_APPROX_EQUAL(avg_p, e_avg_p);
    ASSERT_APPROX_EQUAL(ndcg, e_ndcg);
}