void do_predict(FILE *input, FILE *output) { Int correct = 0; Int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; Int nr_class=get_nr_class(model_); double *prob_estimates=NULL; Int j, n; Int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { Int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(Int *) malloc(nr_class*sizeof(Int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 100000000; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { Int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; Int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (Int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { Int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(check_regression_model(model_)) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format mxArray *tplhs[3]; // temporary storage for plhs[] int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(nlhs, plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(nlhs, plhs); return; } ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(col_format_flag) pplhs[0] = (mxArray *)prhs[1]; else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(nlhs, plhs); return; } } prob_estimates = Malloc(double, nr_class); tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(tplhs[0]); ptr_prob_estimates = mxGetPr(tplhs[2]); ptr_dec_values = mxGetPr(tplhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target_label, predict_label; target_label = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { predict_label = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); predict_label = predict_values(model_, x, dec_values); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_w;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(check_regression_model(model_)) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient tplhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); ptr = mxGetPr(tplhs[1]); ptr[0] = (double)correct/total*100; ptr[1] = error/total; ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)); free(x); if(prob_estimates != NULL) free(prob_estimates); switch(nlhs) { case 3: plhs[2] = tplhs[2]; plhs[1] = tplhs[1]; case 1: case 0: plhs[0] = tplhs[0]; } }