double binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); int *labels; dvec_t dec_values; ivec_t ty; for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); std::swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct svm_problem subprob; subprob.l = l-(end-begin); subprob.x = Malloc(struct svm_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct svm_model *submodel = svm_train(&subprob,param); int svm_type = svm_get_svm_type(submodel); if(svm_type == NU_SVR || svm_type == EPSILON_SVR){ fprintf(stderr, "wrong svm type"); exit(1); } labels = Malloc(int, svm_get_nr_class(submodel)); svm_get_labels(submodel, labels); if(svm_get_nr_class(submodel) > 2) { fprintf(stderr,"Error: the number of class is not equal to 2\n"); exit(-1); } dec_values.resize(end); ty.resize(end); for(j=begin;j<end;j++) { svm_predict_values(submodel,prob->x[perm[j]], &dec_values[j]); ty[j] = (prob->y[perm[j]] > 0)? 1: -1; } if(labels[0] <= 0) { for(j=begin;j<end;j++) dec_values[j] *= -1; } svm_free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); free(labels); } free(perm); free(fold_start); return validation_function(dec_values, ty); }
void binary_class_predict(FILE *input, FILE *output){ int total = 0; int *labels; int max_nr_attr = 64; struct feature_node *x = Malloc(struct feature_node, max_nr_attr); dvec_t dec_values; ivec_t true_labels; int n; if(model_->bias >= 1) n = get_nr_feature(model_) + 1; else n = get_nr_feature(model_); labels = Malloc(int, get_nr_class(model_)); get_labels(model_, labels); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0 label = strtok(line," \t"); target_label = strtod(label,&endptr); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr - 2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); ++i; } if(model_->bias >= 0){ x[i].index = n; x[i].value = model_->bias; ++i; } x[i].index = -1; predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); double dec_value; predict_values(model_, x, &dec_value); true_labels.push_back((target_label > 0)? 1: -1); if(labels[0] <= 0) dec_value *= -1; dec_values.push_back(dec_value); } validation_function(dec_values, true_labels); free(labels); free(x); }