// Revised from function 'svm_predict_probability' for HIK double hik_predict_probability(const svm_model *model,Array2dC<double>& eval,const int m,const int upper_bound,svm_inode *x,double *prob_estimates) { if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA!=NULL && model->probB!=NULL) { int i; int nr_class = model->nr_class; double *dec_values = new double[nr_class*(nr_class-1)/2]; hik_predict_values(model, eval, m, upper_bound, x, dec_values); double min_prob=1e-7; double **pairwise_prob=new double*[nr_class]; for(i=0;i<nr_class;i++) pairwise_prob[i]=new double[nr_class]; int k=0; for(i=0;i<nr_class;i++) { for(int j=i+1;j<nr_class;j++) { pairwise_prob[i][j]=std::min(std::max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob); pairwise_prob[j][i]=1-pairwise_prob[i][j]; k++; } } multiclass_probability(nr_class,pairwise_prob,prob_estimates); int prob_max_idx = 0; for(i=1;i<nr_class;i++) if(prob_estimates[i] > prob_estimates[prob_max_idx]) prob_max_idx = i; for(i=0;i<nr_class;i++) delete[] pairwise_prob[i]; delete[] pairwise_prob; delete[] dec_values; return model->label[prob_max_idx]; } else return hik_predict(model, eval, m, upper_bound, x); }
void do_predict(FILE *input, FILE *output, FILE *output2) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); double *dec_values = (double *) malloc(nr_class*sizeof(double)); double prob = 0; double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label=predict_values(model_, x, dec_values); fprintf(output,"%g\n",predict_label); //transfer the score into probability prob = sigmoid_predict(dec_values[0], model_->probA, model_->probB); fprintf(output2,"%4g\n",prob); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); free(dec_values); }