void rank_cross_validation(const problem *prob, const parameter *param, int nr_fold, double *result) { int i,q; int *fold_start; int l = prob->l; int *query_set; double *target = Malloc(double,l); int nr_query; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); int *query_perm; group_queries(prob->query, l, &nr_query, &start, &count, perm); if (nr_query == 1) { if (nr_fold > prob->l / 2) { nr_fold = l / 2; // each fold should include at least 2 instances to form pairs fprintf(stderr,"WARNING: # folds > # data / 2. Will use # folds = # data / 2 instead (Every fold should contain 2 data to form a pair)\n"); } nr_query = nr_fold; // Treat each fold as a query in performance evaluation // to avoid ranking inconsistency between models. start = (int *)realloc(start,nr_query * sizeof(int)); count = (int *)realloc(count,nr_query * sizeof(int)); query_set = Malloc(int,l); for(q=0;q<nr_query;q++) { count[q] = 0; start[q] = 0; } for(i=0;i<l;i++) { int j = rand() % nr_query; query_set[i] = j; count[j]++; } start[0] = 0; for(q=1;q<nr_query;q++) start[q] = start[q-1] + count[q-1]; for(i=0;i<l;i++) { perm[start[query_set[i]]] = i; ++start[query_set[i]]; } start[0] = 0; for(q=1;q<nr_query;q++) start[q] = start[q-1] + count[q-1]; } else {
l2r_l2_ranksvm_fun::l2r_l2_ranksvm_fun(const problem *prob, double C) { int q,i,k; int l=prob->l; double *y = prob->y; this->prob = prob; z = new double[l]; l_plus = new int[l]; l_minus = new int[l]; alpha_plus = new double[l]; alpha_minus = new double[l]; int_y = new int[prob->l]; this->C = C; // Checking number of relevance levels in each query // Meanwhile transform the labels into 1,...,k perm = Malloc(int,l); group_queries(prob->query, l, &nr_query, &start, &count, perm); nr_class = new int[nr_query]; order_perm = new id_and_value *[nr_query]; for (q=0;q<nr_query;q++) order_perm[q] = new id_and_value[count[q]]; for (q=0;q<nr_query;q++) { int *perm_q = &perm[start[q]]; id_and_value *order_perm_q = order_perm[q]; k = 1; for (i=0;i<count[q];i++) { order_perm_q[i].id = perm_q[i]; order_perm_q[i].value = y[perm_q[i]]; } qsort(order_perm_q, count[q], sizeof(id_and_value), compare_values); int_y[order_perm_q[0].id] = 1; for(i=1;i<count[q];i++) { if (order_perm_q[i-1].value<order_perm_q[i].value) k++; int_y[order_perm_q[i].id] = k; } nr_class[q] = k; } }
// // Interface functions // model* train(const problem *prob, const parameter *param) { int l = prob->l; int n = prob->n; int w_size = prob->n; model *model_ = Malloc(model,1); model_->nr_feature=n; model_->param = *param; if(param->solver_type == SELECTION_TREE) { model_->w = Malloc(double, w_size); model_->nr_class = 2; int nr_subset; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); group_queries(prob, &nr_subset ,&start, &count, perm); train_one(prob, param, &model_->w[0],0,0, nr_subset, perm, start, count); free(start); free(count); free(perm); }
void eval_list(double *label, double *target, int *query, int l, double *result_ret) { int q,i,j,k; int nr_query; int *start = NULL; int *count = NULL; int *perm = Malloc(int, l); id_and_value *order_perm; int true_query; long long totalnc = 0, totalnd = 0; long long nc = 0; long long nd = 0; double tmp; double accuracy = 0; int *l_plus; int *int_y; int same_y = 0; double *ideal_dcg; double *dcg; double meanndcg = 0; double ndcg; selectiontree *T; group_queries(query, l, &nr_query, &start, &count, perm); true_query = nr_query; for (q=0;q<nr_query;q++) { //We use selection trees to compute pairwise accuracy nc = 0; nd = 0; l_plus = new int[count[q]]; int_y = new int[count[q]]; order_perm = new id_and_value[count[q]]; int *perm_q = &perm[start[q]]; for (i=0;i<count[q];i++) { order_perm[i].id = i; order_perm[i].value = label[perm_q[i]]; } qsort(order_perm, count[q], sizeof(id_and_value), compare_values); int_y[order_perm[0].id] = 1; same_y = 0; k = 1; for(i=1;i<count[q];i++) { if (order_perm[i-1].value < order_perm[i].value) { same_y = 0; k++; } else same_y++; int_y[order_perm[i].id] = k; nc += (i - same_y); } for (i=0;i<count[q];i++) { order_perm[i].id = i; order_perm[i].value = target[perm_q[i]]; } qsort(order_perm, count[q], sizeof(id_and_value), compare_values); //total pairs T = new selectiontree(k); j = 0; for (i=0;i<count[q];i++) { while (j<count[q] && ( order_perm[j].value < order_perm[i].value)) { T->insert_node(int_y[order_perm[j].id], tmp); j++; } T->larger(int_y[order_perm[i].id], &l_plus[order_perm[i].id], &tmp); } delete T; for (i=0;i<count[q];i++) nd += l_plus[i]; nc -= nd; if (nc != 0 || nd != 0) accuracy += double(nc)/double(nc+nd); else true_query--; totalnc += nc; totalnd += nd; delete[] l_plus; delete[] int_y; delete[] order_perm; } result_ret[0] = (double)totalnc/(double)(totalnc+totalnd); for (q=0;q<nr_query;q++) { //mean ndcg by the formulation of LETOR ideal_dcg = new double[count[q]]; dcg = new double[count[q]]; ndcg = 0; order_perm = new id_and_value[count[q]]; int *perm_q = &perm[start[q]]; for (i=0;i<count[q];i++) { order_perm[i].id = perm_q[i]; order_perm[i].value = label[perm_q[i]]; } qsort(order_perm, count[q], sizeof(id_and_value), compare_values); ideal_dcg[0] = pow(2.0,order_perm[count[q]-1].value) - 1; for (i=1;i<count[q];i++) ideal_dcg[i] = ideal_dcg[i-1] + (pow(2.0,order_perm[count[q]-1 - i].value) - 1) * log(2.0) / log(i+1.0); for (i=0;i<count[q];i++) { order_perm[i].id = perm_q[i]; order_perm[i].value = target[perm_q[i]]; } qsort(order_perm, count[q], sizeof(id_and_value), compare_values); dcg[0] = pow(2.0, label[order_perm[count[q] - 1].id]) - 1; for (i=1;i<count[q];i++) dcg[i] = dcg[i-1] + (pow(2.0, label[order_perm[count[q] - 1 - i].id]) - 1) * log(2.0) / log(i + 1.0); if (ideal_dcg[0]>0) for (i=0;i<count[q];i++) ndcg += dcg[i]/ideal_dcg[i]; else ndcg = 0; meanndcg += ndcg/count[q]; delete[] order_perm; delete[] ideal_dcg; delete[] dcg; } meanndcg /= nr_query; result_ret[1] = meanndcg; free(start); free(count); free(perm); }
l2r_rank_fun::l2r_rank_fun(const rksvm_problem *prob, const rksvm_parameter *param, Scheduler *scheduler, struct SolutionInfo *si) { this->si = si; this->param = param; si->rho = 0; si->upper_bound_p = INF; si->upper_bound_n = INF; int l=prob->l; this->prob = prob; this->C = param->C; this->thread_count = param->thread_count;// this->current_rank = mpi_get_rank();// this->global_l = prob->global_l;// z = new double[l]; int i,j,k; perm = new int[l]; group_queries(prob, &nr_subset ,&start, &count, perm); pi = new id_and_value* [nr_subset]; #pragma omp parallel for default(shared) if(nr_subset > 50) for (int i=0;i<nr_subset;i++) { pi[i] = new id_and_value[count[i]]; } double *y=prob->y; int_y = new int[prob->l]; nr_class = new int[nr_subset]; l_plus = new int[l]; l_minus = new int[l]; gamma_plus = new double[l]; gamma_minus = new double[l]; ATAQb = new double[l]; ATe = new double[l]; // the variable we have changed; this->scheduler = scheduler; this->local_l = scheduler->local_l; this->start_ptr = scheduler->start_ptr; //this->nr_recv = scheduler->nr_recv; //this->nr_send = scheduler->nr_send; gz = new double[global_l]; //gATAQb = new double[global_]; //gATe = new double[global_l]; Q = new double[l*global_l]; //here, it shows how to compute Q through TBB library. nomad_fun(prob, param, scheduler, Q); //testing Q //char *file = "/home/jing/model/Q.txt"; //save_Q(file, prob, Q); //mpi_exit(1); #pragma omp parallel for default(shared) private(i,j,k) for (i=0;i<nr_subset;i++) { k=1; for (j=0;j<count[i];j++) { pi[i][j].id=perm[j+start[i]]; pi[i][j].value=y[perm[j+start[i]]]; } qsort(pi[i], count[i], sizeof(id_and_value), compare_id_and_value); int_y[pi[i][count[i]-1].id]=1; for(j=count[i]-2;j>=0;j--) { if (pi[i][j].value>pi[i][j+1].value) k++; int_y[pi[i][j].id]=k; } nr_class[i]=k; } }