Beispiel #1
0
double binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold)
{
	int i;
	int *fold_start = Malloc(int,nr_fold+1);
	int l = prob->l;
	int *perm = Malloc(int,l);
	int *labels;
	dvec_t dec_values;
	ivec_t ty;

	for(i=0;i<l;i++) perm[i]=i;
	for(i=0;i<l;i++)
	{
		int j = i+rand()%(l-i);
		std::swap(perm[i],perm[j]);
	}
	for(i=0;i<=nr_fold;i++)
		fold_start[i]=i*l/nr_fold;

	for(i=0;i<nr_fold;i++)
	{
		int                begin   = fold_start[i];
		int                end     = fold_start[i+1];
		int                j,k;
		struct svm_problem subprob;

		subprob.l = l-(end-begin);
		subprob.x = Malloc(struct svm_node*,subprob.l);
		subprob.y = Malloc(double,subprob.l);

		k=0;
		for(j=0;j<begin;j++)
		{
			subprob.x[k] = prob->x[perm[j]];
			subprob.y[k] = prob->y[perm[j]];
			++k;
		}
		for(j=end;j<l;j++)
		{
			subprob.x[k] = prob->x[perm[j]];
			subprob.y[k] = prob->y[perm[j]];
			++k;
		}
		struct svm_model *submodel = svm_train(&subprob,param);
		int svm_type = svm_get_svm_type(submodel);

		if(svm_type == NU_SVR || svm_type == EPSILON_SVR){
			fprintf(stderr, "wrong svm type");
			exit(1);
		}

		labels = Malloc(int, svm_get_nr_class(submodel));
		svm_get_labels(submodel, labels);

		if(svm_get_nr_class(submodel) > 2)
		{
			fprintf(stderr,"Error: the number of class is not equal to 2\n");
			exit(-1);
		}

		dec_values.resize(end);
		ty.resize(end);

		for(j=begin;j<end;j++) {
			svm_predict_values(submodel,prob->x[perm[j]], &dec_values[j]);
			ty[j] = (prob->y[perm[j]] > 0)? 1: -1;
		}


		if(labels[0] <= 0) {
			for(j=begin;j<end;j++)
				dec_values[j] *= -1;
		}

		svm_free_and_destroy_model(&submodel);
		free(subprob.x);
		free(subprob.y);
		free(labels);
	}

	free(perm);
	free(fold_start);

	return validation_function(dec_values, ty);
}
Beispiel #2
0
void binary_class_predict(FILE *input, FILE *output){
	int    total = 0;
	int    *labels;
	int    max_nr_attr = 64;
	struct feature_node *x = Malloc(struct feature_node, max_nr_attr);
	dvec_t dec_values;
	ivec_t true_labels;
	int n;
	if(model_->bias >= 1)
		n = get_nr_feature(model_) + 1;	
	else
		n = get_nr_feature(model_);


	labels = Malloc(int, get_nr_class(model_));
	get_labels(model_, labels);
	
	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0

		label = strtok(line," \t");
		target_label = strtod(label,&endptr);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr - 2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			++i;
		}
		
		if(model_->bias >= 0){
			x[i].index = n;
			x[i].value = model_->bias; 
			++i;	
		}
	
		x[i].index = -1;

		predict_label = predict(model_,x);
		fprintf(output,"%g\n",predict_label);


		double dec_value;
		predict_values(model_, x, &dec_value);
		true_labels.push_back((target_label > 0)? 1: -1);
		if(labels[0] <= 0) dec_value *= -1;
		dec_values.push_back(dec_value);
	}	

	validation_function(dec_values, true_labels);

	free(labels);
	free(x);
}