void do_predict(FILE *input, FILE *output) { std::vector<double> pred_values; //store decision values std::vector<double> true_values; //store true values int total = 0; int nr_class = get_nr_class(model_); int * labels = Malloc(int, nr_class); get_labels(model_, labels); double * prob_estimates = NULL; int j, n; int nr_feature = get_nr_feature(model_); if(model_->bias >=0) n = nr_feature+1; else n = nr_feature; // not yet support multiclass assert(nr_class==2); //print out header... if(output_option ==2) { prob_estimates = Malloc(double, nr_class); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); }
bool QPredictLinearLearner::predict(QPredictDocument &doc) { QPredictFeatureList &feature_list = doc.feature_list; int num_space = feature_list.size(); num_space++; num_space++; // for bias struct feature_node *x_space = new struct feature_node[num_space]; int nr_feature = get_nr_feature(m_model); int n; if (m_model->bias >= 0) n = nr_feature + 1; else n = nr_feature; sort(feature_list.begin(), feature_list.end(), QPredictFeature::feature_compare); const QPredictFeatureListIter &feature_end_it = feature_list.end(); int j = 0; for (QPredictFeatureListIter feature_it = feature_list.begin(); feature_it != feature_end_it; ++feature_it) { x_space[j].index = feature_it->id; x_space[j].value = feature_it->value; ++j; } if(m_model->bias >= 0) { x_space[j].index = n; x_space[j].value = m_model->bias; ++j; } x_space[j].index = -1; x_space[j].value = -1; if (check_probability_model(m_model)) { doc.predict_class_index = static_cast<uint32_t>( ::predict_probability(m_model, x_space, doc.predict_class_probs) ); } else { doc.predict_class_index = static_cast<uint32_t>( ::predict(m_model, x_space) ); } delete []x_space; return true; }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(model_->normal){ double length = 0; for(int kk = 0; x[kk].index != -1; kk++) length += x[kk].value * x[kk].value; length = sqrt(length); for(int kk = 0; x[kk].index != -1; kk++) x[kk].value /= length; } if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(col_format_flag) pplhs[0] = (mxArray *)prhs[1]; else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target_label, predict_label; target_label = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { predict_label = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); predict_label = predict_values(model_, x, dec_values); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_w;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { mexPrintf("Mean squared error = %g (regression)\n",error/total); mexPrintf("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } //else //mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; ptr[1] = error/total; ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)); free(x); if(prob_estimates != NULL) free(prob_estimates); }
int QPredictLinearLearner::num_of_features() { assert(m_model != NULL); return get_nr_feature(m_model); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; int target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t"); target_label = (int) strtol(label,&endptr,10); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_instance, *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(prhs[1]); ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(mxIsSparse(prhs[1])) { if(col_format_flag) { pplhs[0] = (mxArray *)prhs[1]; } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } else mexPrintf("Testing_instance_matrix must be sparse\n"); prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0; instance_index<testing_instance_number; instance_index++) { int i; double target,v; target = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { v = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = v; for(i=0; i<nr_class; i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); v = predict(model_, x); ptr_predict_label[instance_index] = v; predict_values(model_, x, dec_values); for(i=0; i<nr_w; i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } if(v == target) ++correct; ++total; } mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double) correct/total*100; free(x); if(prob_estimates != NULL) free(prob_estimates); }
void binary_class_predict(FILE *input, FILE *output){ int total = 0; int *labels; int max_nr_attr = 64; struct feature_node *x = Malloc(struct feature_node, max_nr_attr); dvec_t dec_values; ivec_t true_labels; int n; if(model_->bias >= 1) n = get_nr_feature(model_) + 1; else n = get_nr_feature(model_); labels = Malloc(int, get_nr_class(model_)); get_labels(model_, labels); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0 label = strtok(line," \t"); target_label = strtod(label,&endptr); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr - 2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); ++i; } if(model_->bias >= 0){ x[i].index = n; x[i].value = model_->bias; ++i; } x[i].index = -1; predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); double dec_value; predict_values(model_, x, &dec_value); true_labels.push_back((target_label > 0)? 1: -1); if(labels[0] <= 0) dec_value *= -1; dec_values.push_back(dec_value); } validation_function(dec_values, true_labels); free(labels); free(x); }
void do_predict(FILE *input, FILE *output) { int total=0; int n; int nr_feature=get_nr_feature(model_); double *dvec_t; double *ivec_t; int *query; n=nr_feature; max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) total++; rewind(input); dvec_t = new double[total]; ivec_t = new double[total]; query = new int[total]; total = 0; while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format query[total] = 0; label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); ivec_t[total] = target_label; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; if (strcmp(idx,"qid") == 0) { errno = 0; query[total] = (int) strtol(val,&endptr,10); if(endptr == val || errno != 0 || *endptr != '\0') exit_input_error(i+1); continue; } errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } x[i].index = -1; predict_label = predict(model_,x); fprintf(output,"%.10f\n",predict_label); dvec_t[total++] = predict_label; } double result[3]; eval_list(ivec_t,dvec_t,query,total,result); info("Pairwise Accuracy = %g%%\n",result[0]*100); info("MeanNDCG (LETOR) = %g\n",result[1]); info("NDCG (YAHOO) = %g\n",result[2]); }
void do_predict(FILE *input, FILE *output) { int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } prob_estimates = (double *) malloc(nr_class*sizeof(double)); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); int clicks = 0; int shows = 0; while(readline(input) != NULL) { int i = 0; double target_ctr, predict_ctr; char *idx, *val, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format char *p = strtok(line," \t\n"); //clicks if(p == NULL) // empty line exit_input_error(total+1); clicks = atoi(p); p = strtok(NULL," \t"); // shows shows = atoi(p); p = strtok(NULL," \t"); // qid:1 if (shows <=0 || clicks > shows) { continue; } target_ctr = (double)clicks / shows; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; predict_probability(model_,x,prob_estimates); fprintf(output,"%d %d ", clicks, shows); predict_ctr = prob_estimates[0]; fprintf(output," %g\n", predict_ctr); } info("total:%d\n",total); free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_[0]); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_[0]); if(model_[0]->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_[0])) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_[0],labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); // target_label = strtod(label,&endptr); switch (label[0]) { case 'A': target_label = 0; break; case 'B': target_label = 1; break; case 'C': target_label = 1; break; case 'D': target_label = 1; break; } // if(endptr == label || *endptr != '\0') // exit_input_error(total+1); for (int pid = 0; pid < sum_pro; pid++) { while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_[pid]->bias>=0) { x[i].index = n; x[i].value = model_[pid]->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_[pid],x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_[pid]->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { p_label[pid] = predict(model_[pid],x); fprintf(output,"%g", p_label[pid]); // printf("pid%dhas done\n",pid ); } fprintf(output, "\n" ); } int count = 0; predict_label = 0; // for ( int l = 0; l < BLOCK ; l++) { // for (int m = 0;m < BLOCK * N; m++) { // // printf("%f\t", p_label[l * BLOCK + m]); // if ( p_label[l * BLOCK + m] == 1) { // // p_label[l] = 1; // // break; // p_label[l]++; // // count++;* 4 // } // } // if (p_label[l] < 4) { // count++; // } // // if ( p_label[l] == 1) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // // if (count >0) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // } // if (count > 0 ) { // predict_label = 0; // } // else { // predict_label = 1; // } // /printf("\n"); // fprintf(output,"%g\n",predict_label); // if(predict_label == target_label) // ++correct; // error += (predict_label-target_label)*(predict_label-target_label); // sump += predict_label; // sumt += target_label; // sumpp += predict_label*predict_label; // sumtt += target_label*target_label; // sumpt += predict_label*target_label; // ++total; } // if(check_regression_model(model_[0])) // { // info("Mean squared error = %g (regression)\n",error/total); // info("Squared correlation coefficient = %g (regression)\n", // ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ // ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) // ); // } // else // info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); // if(flag_predict_probability) // free(prob_estimates); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(model_->param.solver_type!=L2_LR) { fprintf(stderr, "probability output is only supported for logistic regression\n"); return; } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } while(1) { int i = 0; int c; double target; int target_label, predict_label; if (fscanf(input,"%lf",&target)==EOF) break; target_label=(int)target; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } do { c = getc(input); if(c=='\n' || c==EOF) goto out2; } while(isspace(c)); ungetc(c,input); if (fscanf(input,"%d:%lf",&x[i].index,&x[i].value) < 2) { fprintf(stderr,"Wrong input format at line %d\n", total+1); exit(1); } // feature indices larger than those in training are not used if(x[i].index<=nr_feature) ++i; } out2: if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d ",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output,"%g ",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }