예제 #1
0
void rank_cross_validation(const problem *prob, const parameter *param, int nr_fold, double *result)
{
	int i,q;
	int *fold_start;
	int l = prob->l;
	int *query_set;
	double *target = Malloc(double,l);
	int nr_query;
	int *start = NULL;
	int *count = NULL;
	int *perm = Malloc(int,l);
	int *query_perm;
	group_queries(prob->query, l, &nr_query, &start, &count, perm);
	if (nr_query == 1)
	{
		if (nr_fold > prob->l / 2)
		{
			nr_fold = l / 2; // each fold should include at least 2 instances to form pairs
			fprintf(stderr,"WARNING: # folds > # data / 2. Will use # folds = # data / 2 instead (Every fold should contain 2 data to form a pair)\n");
		}
		nr_query = nr_fold;
// Treat each fold as a query in performance evaluation
// to avoid ranking inconsistency between models.
		start = (int *)realloc(start,nr_query * sizeof(int));
		count = (int *)realloc(count,nr_query * sizeof(int));
		query_set = Malloc(int,l);
		for(q=0;q<nr_query;q++)
		{
			count[q] = 0;
			start[q] = 0;
		}
		for(i=0;i<l;i++)
		{
			int j = rand() % nr_query;
			query_set[i] = j;
			count[j]++;
		}
		start[0] = 0;
		for(q=1;q<nr_query;q++)
			start[q] = start[q-1] + count[q-1];
		for(i=0;i<l;i++)
		{
			perm[start[query_set[i]]] = i;
			++start[query_set[i]];
		}
		start[0] = 0;
		for(q=1;q<nr_query;q++)
			start[q] = start[q-1] + count[q-1];
	}
	else
	{
예제 #2
0
l2r_l2_ranksvm_fun::l2r_l2_ranksvm_fun(const problem *prob, double C)
{
	int q,i,k;
	int l=prob->l;
	double *y = prob->y;

	this->prob = prob;

	z = new double[l];
	l_plus = new int[l];
	l_minus = new int[l];
	alpha_plus = new double[l];
	alpha_minus = new double[l];
	int_y = new int[prob->l];
	this->C = C;
// Checking number of relevance levels in each query
// Meanwhile transform the labels into 1,...,k
	perm = Malloc(int,l);
	group_queries(prob->query, l, &nr_query, &start, &count, perm);
	nr_class = new int[nr_query];
	order_perm = new id_and_value *[nr_query];
	for (q=0;q<nr_query;q++)
		order_perm[q] = new id_and_value[count[q]];
	for (q=0;q<nr_query;q++)
	{
		int *perm_q = &perm[start[q]];
		id_and_value *order_perm_q = order_perm[q];
		k = 1;
		for (i=0;i<count[q];i++)
		{
			order_perm_q[i].id = perm_q[i];
			order_perm_q[i].value = y[perm_q[i]];
		}
		qsort(order_perm_q, count[q], sizeof(id_and_value), compare_values);

		int_y[order_perm_q[0].id] = 1;
		for(i=1;i<count[q];i++)
		{
			if (order_perm_q[i-1].value<order_perm_q[i].value)
				k++;
			int_y[order_perm_q[i].id] = k;
		}
		nr_class[q] = k;
	}
}
예제 #3
0
//
// Interface functions
//
model* train(const problem *prob, const parameter *param)
{
	int l = prob->l;
	int n = prob->n;
	int w_size = prob->n;
	model *model_ = Malloc(model,1);

	model_->nr_feature=n;
	model_->param = *param;
	
	if(param->solver_type == SELECTION_TREE)
	{
		model_->w = Malloc(double, w_size);
		model_->nr_class = 2;
		int nr_subset;
		int *start = NULL;
		int *count = NULL;
		int *perm = Malloc(int,l);
		group_queries(prob, &nr_subset ,&start, &count, perm);
		train_one(prob, param, &model_->w[0],0,0, nr_subset, perm, start, count);
		free(start);
		free(count);
		free(perm);
	}
예제 #4
0
void eval_list(double *label, double *target, int *query, int l, double *result_ret)
{
	int q,i,j,k;
	int nr_query;
	int *start = NULL;
	int *count = NULL;
	int *perm = Malloc(int, l);
	id_and_value *order_perm;
	int true_query;
	long long totalnc = 0, totalnd = 0;
	long long nc = 0;
	long long nd = 0;
	double tmp;
	double accuracy = 0;
	int *l_plus;
	int *int_y;
	int same_y = 0;
	double *ideal_dcg;
	double *dcg;
	double meanndcg = 0;
	double ndcg;
	selectiontree *T;
	group_queries(query, l, &nr_query, &start, &count, perm);
	true_query = nr_query;
	for (q=0;q<nr_query;q++)
	{
		//We use selection trees to compute pairwise accuracy
		nc = 0;
		nd = 0;
		l_plus = new int[count[q]];
		int_y = new int[count[q]];
		order_perm = new id_and_value[count[q]];
		int *perm_q = &perm[start[q]];
		for (i=0;i<count[q];i++)
		{
			order_perm[i].id = i;
			order_perm[i].value = label[perm_q[i]];
		}
		qsort(order_perm, count[q], sizeof(id_and_value), compare_values);
		int_y[order_perm[0].id] = 1;
		same_y = 0;
		k = 1;
		for(i=1;i<count[q];i++)
		{
			if (order_perm[i-1].value < order_perm[i].value)
			{
				same_y = 0;
				k++;
			}
			else
				same_y++;
			int_y[order_perm[i].id] = k;
			nc += (i - same_y);
		}
		for (i=0;i<count[q];i++)
		{
			order_perm[i].id = i;
			order_perm[i].value = target[perm_q[i]];
		}
		qsort(order_perm, count[q], sizeof(id_and_value), compare_values);
		//total pairs
		T = new selectiontree(k);
		j = 0;
		for (i=0;i<count[q];i++)
		{
			while (j<count[q] && ( order_perm[j].value < order_perm[i].value))
			{
				T->insert_node(int_y[order_perm[j].id], tmp);
				j++;
			}
			T->larger(int_y[order_perm[i].id], &l_plus[order_perm[i].id], &tmp);
		}
		delete T;

		for (i=0;i<count[q];i++)
			nd += l_plus[i];
		nc -= nd;
		if (nc != 0 || nd != 0)
			accuracy += double(nc)/double(nc+nd);
		else
			true_query--;
		totalnc += nc;
		totalnd += nd;
		delete[] l_plus;
		delete[] int_y;
		delete[] order_perm;
	}
	result_ret[0] = (double)totalnc/(double)(totalnc+totalnd);
	for (q=0;q<nr_query;q++)
	{
		//mean ndcg by the formulation of LETOR
		ideal_dcg = new double[count[q]];
		dcg = new double[count[q]];
		ndcg = 0;
		order_perm = new id_and_value[count[q]];
		int *perm_q = &perm[start[q]];
		for (i=0;i<count[q];i++)
		{
			order_perm[i].id = perm_q[i];
			order_perm[i].value = label[perm_q[i]];
		}
		qsort(order_perm, count[q], sizeof(id_and_value), compare_values);
		ideal_dcg[0] = pow(2.0,order_perm[count[q]-1].value) - 1;
		for (i=1;i<count[q];i++)
			ideal_dcg[i] = ideal_dcg[i-1] + (pow(2.0,order_perm[count[q]-1 - i].value) - 1) * log(2.0) / log(i+1.0);
		for (i=0;i<count[q];i++)
		{
			order_perm[i].id = perm_q[i];
			order_perm[i].value = target[perm_q[i]];
		}
		qsort(order_perm, count[q], sizeof(id_and_value), compare_values);
		dcg[0] = pow(2.0, label[order_perm[count[q] - 1].id]) - 1;
		for (i=1;i<count[q];i++)
			dcg[i] = dcg[i-1] + (pow(2.0, label[order_perm[count[q] - 1 - i].id]) - 1) * log(2.0) / log(i + 1.0);
		if (ideal_dcg[0]>0)
			for (i=0;i<count[q];i++)
				ndcg += dcg[i]/ideal_dcg[i];
		else
			ndcg = 0;
		meanndcg += ndcg/count[q];
		delete[] order_perm;
		delete[] ideal_dcg;
		delete[] dcg;
	}
	meanndcg /= nr_query;
	result_ret[1] = meanndcg;
	free(start);
	free(count);
	free(perm);
}
예제 #5
0
l2r_rank_fun::l2r_rank_fun(const rksvm_problem *prob, const rksvm_parameter *param, 
		Scheduler *scheduler, struct SolutionInfo *si)
{
	this->si = si;
	this->param = param;
	si->rho = 0;
	si->upper_bound_p = INF;
	si->upper_bound_n = INF;
	int l=prob->l;
	this->prob = prob;
	this->C = param->C;
	this->thread_count = param->thread_count;//
	this->current_rank = mpi_get_rank();//
	this->global_l = prob->global_l;//
	z = new double[l];

	int i,j,k;
	perm = new int[l];
	group_queries(prob, &nr_subset ,&start, &count, perm);
	pi = new id_and_value* [nr_subset];

#pragma omp parallel for default(shared) if(nr_subset > 50)
	for (int i=0;i<nr_subset;i++)
	{
		pi[i] = new id_and_value[count[i]];
	}

	double *y=prob->y;
	int_y = new int[prob->l];
	nr_class = new int[nr_subset];
	l_plus = new int[l];
	l_minus = new int[l];
	gamma_plus = new double[l];
	gamma_minus = new double[l];
	ATAQb = new double[l];
	ATe = new double[l];

	// the variable we have changed;
	this->scheduler = scheduler;
	this->local_l = scheduler->local_l;
	this->start_ptr = scheduler->start_ptr;
	//this->nr_recv = scheduler->nr_recv;
	//this->nr_send = scheduler->nr_send;
	gz = new double[global_l];
	//gATAQb = new double[global_];
	//gATe = new double[global_l];
	Q = new double[l*global_l];

	//here, it shows how to compute Q through TBB library.
	nomad_fun(prob, param, scheduler, Q);

//testing Q 
//char *file = "/home/jing/model/Q.txt";
//save_Q(file, prob, Q);	
//mpi_exit(1);


#pragma omp parallel for default(shared) private(i,j,k)	
	for (i=0;i<nr_subset;i++)
	{
		k=1;
		for (j=0;j<count[i];j++)
		{
			pi[i][j].id=perm[j+start[i]];
			pi[i][j].value=y[perm[j+start[i]]];
		}
		qsort(pi[i], count[i], sizeof(id_and_value), compare_id_and_value);
		int_y[pi[i][count[i]-1].id]=1;
		for(j=count[i]-2;j>=0;j--)
		{
			if (pi[i][j].value>pi[i][j+1].value)
				k++;
			int_y[pi[i][j].id]=k;
		}
		nr_class[i]=k;
	}
}