// return classification error and the normalized difference between predicted and true sentiment std::pair<double, double> do_predict(const struct problem *test_prob, struct model* model_) { double acc = 0; double clse=0; int total = 0; double *prob_estimates=NULL; int *labels=NULL; int nr_class=get_nr_class(model_); if(flag_predict_probability) { if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); } int l = test_prob->l; int i = 0; for(i=0; i<l; i++) { int predict_label = 0; int target_label=test_prob->y[i]; feature_node *xi = test_prob->x[i]; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,xi,prob_estimates); double predict_score=0; for(j=0;j<model_->nr_class;j++) predict_score+=prob_estimates[j]*labels[j]; //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_score - target_label)*(predict_score - target_label)))/acc_max; acc += (predict_score - target_label) * (predict_score - target_label); if (predict_label!=target_label) clse++; } else { predict_label = predict(model_,xi); //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_label - target_label)*(predict_label - target_label)))/acc_max; acc += (predict_label - target_label) * (predict_label - target_label); if (predict_label!=target_label) clse++; } ++total; } if(flag_predict_probability) { free(prob_estimates); free(labels); } //printf("Error = %g%% (%d/%d)\n",(double) (total-correct)/total*100,total-correct,total); return std::make_pair(clse/total,acc/total) ; }
bool QPredictLinearLearner::predict(QPredictDocument &doc) { QPredictFeatureList &feature_list = doc.feature_list; int num_space = feature_list.size(); num_space++; num_space++; // for bias struct feature_node *x_space = new struct feature_node[num_space]; int nr_feature = get_nr_feature(m_model); int n; if (m_model->bias >= 0) n = nr_feature + 1; else n = nr_feature; sort(feature_list.begin(), feature_list.end(), QPredictFeature::feature_compare); const QPredictFeatureListIter &feature_end_it = feature_list.end(); int j = 0; for (QPredictFeatureListIter feature_it = feature_list.begin(); feature_it != feature_end_it; ++feature_it) { x_space[j].index = feature_it->id; x_space[j].value = feature_it->value; ++j; } if(m_model->bias >= 0) { x_space[j].index = n; x_space[j].value = m_model->bias; ++j; } x_space[j].index = -1; x_space[j].value = -1; if (check_probability_model(m_model)) { doc.predict_class_index = static_cast<uint32_t>( ::predict_probability(m_model, x_space, doc.predict_class_probs) ); } else { doc.predict_class_index = static_cast<uint32_t>( ::predict(m_model, x_space) ); } delete []x_space; return true; }
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){ try{ // Input validation: Uninitialized model if (model_in == nullptr || model_in->w == nullptr || (*model_in->w)->dimSize == 0) throw LVException(__FILE__, __LINE__, "Uninitialized model passed to liblinear_predict_probability."); // Input validation: Empty feature vector if (x_in == nullptr || (*x_in)->dimSize == 0) throw LVException(__FILE__, __LINE__, "Empty feature vector passed to liblinear_predict_probability."); // Input validation: Final index -1? if ((*x_in)->elt[(*x_in)->dimSize - 1].index != -1) throw LVException(__FILE__, __LINE__, "The index of the last element of the feature vector needs to be -1 (liblinear_predict_probability)."); // Convert LVsvm_model to svm_model auto mdl = std::make_unique<model>(); LVConvertModel(*model_in, *mdl); // Check probability model int valid_probability = check_probability_model(mdl.get()); if (!valid_probability) throw LVException(__FILE__, __LINE__, "The selected solver type does not support probability output."); // Allocate room for probability estimates LVResizeNumericArrayHandle(prob_estimates_out, mdl->nr_class); (*prob_estimates_out)->dimSize = mdl->nr_class; double highest_prob_label = predict_probability(mdl.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt); return highest_prob_label; } catch (LVException &ex) { ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (std::exception &ex) { LVException::returnStdException(lvErr, __FILE__, __LINE__, ex); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (...) { LVException ex(__FILE__, __LINE__, "Unknown exception has occurred"); ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } }
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){ try{ // Convert LVsvm_model to svm_model std::unique_ptr<model> model(new model); LVConvertModel(model_in, model.get()); // Check probability model int valid_probability = check_probability_model(model.get()); if (!valid_probability) throw LVException(__FILE__, __LINE__, "The model does not support probability output."); // Allocate room for probability estimates LVResizeNumericArrayHandle(prob_estimates_out, model->nr_class); (*prob_estimates_out)->dimSize = model->nr_class; double highest_prob_label = predict_probability(model.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt); return highest_prob_label; } catch (LVException &ex) { ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (std::exception &ex) { LVException::returnStdException(lvErr, __FILE__, __LINE__, ex); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (...) { LVException ex(__FILE__, __LINE__, "Unknown exception has occurred"); ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(model_->normal){ double length = 0; for(int kk = 0; x[kk].index != -1; kk++) length += x[kk].value * x[kk].value; length = sqrt(length); for(int kk = 0; x[kk].index != -1; kk++) x[kk].value /= length; } if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { int prob_estimate_flag = 0; struct model *model_; char cmd[CMD_LEN]; col_format_flag = 0; if(nrhs > 5 || nrhs < 3) { exit_with_help(); fake_answer(plhs); return; } if(nrhs == 5) { mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); if(strcmp(cmd, "col") == 0) { col_format_flag = 1; } } if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { mexPrintf("Error: label vector and instance matrix must be double\n"); fake_answer(plhs); return; } if(mxIsStruct(prhs[2])) { const char *error_msg; // parse options if(nrhs>=4) { int i, argc = 1; char *argv[CMD_LEN/2]; // put options in argv[] mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); if((argv[argc] = strtok(cmd, " ")) != NULL) while((argv[++argc] = strtok(NULL, " ")) != NULL) ; for(i=1;i<argc;i++) { if(argv[i][0] != '-') break; if(++i>=argc) { exit_with_help(); fake_answer(plhs); return; } switch(argv[i-1][1]) { case 'b': prob_estimate_flag = atoi(argv[i]); break; default: mexPrintf("unknown option\n"); exit_with_help(); fake_answer(plhs); return; } } } model_ = Malloc(struct model, 1); error_msg = matlab_matrix_to_model(model_, prhs[2]); if(error_msg) { mexPrintf("Error: can't read model: %s\n", error_msg); free_and_destroy_model(&model_); fake_answer(plhs); return; } if(prob_estimate_flag) { if(!check_probability_model(model_)) { mexPrintf("probability output is only supported for logistic regression\n"); prob_estimate_flag=0; } } if(mxIsSparse(prhs[1])) do_predict(plhs, prhs, model_, prob_estimate_flag); else { mexPrintf("Testing_instance_matrix must be sparse; " "use sparse(Testing_instance_matrix) first\n"); fake_answer(plhs); } // destroy model_ free_and_destroy_model(&model_); } else {
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; int target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t"); target_label = (int) strtol(label,&endptr,10); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } prob_estimates = (double *) malloc(nr_class*sizeof(double)); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); int clicks = 0; int shows = 0; while(readline(input) != NULL) { int i = 0; double target_ctr, predict_ctr; char *idx, *val, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format char *p = strtok(line," \t\n"); //clicks if(p == NULL) // empty line exit_input_error(total+1); clicks = atoi(p); p = strtok(NULL," \t"); // shows shows = atoi(p); p = strtok(NULL," \t"); // qid:1 if (shows <=0 || clicks > shows) { continue; } target_ctr = (double)clicks / shows; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; predict_probability(model_,x,prob_estimates); fprintf(output,"%d %d ", clicks, shows); predict_ctr = prob_estimates[0]; fprintf(output," %g\n", predict_ctr); } info("total:%d\n",total); free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_[0]); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_[0]); if(model_[0]->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_[0])) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_[0],labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); // target_label = strtod(label,&endptr); switch (label[0]) { case 'A': target_label = 0; break; case 'B': target_label = 1; break; case 'C': target_label = 1; break; case 'D': target_label = 1; break; } // if(endptr == label || *endptr != '\0') // exit_input_error(total+1); for (int pid = 0; pid < sum_pro; pid++) { while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_[pid]->bias>=0) { x[i].index = n; x[i].value = model_[pid]->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_[pid],x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_[pid]->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { p_label[pid] = predict(model_[pid],x); fprintf(output,"%g", p_label[pid]); // printf("pid%dhas done\n",pid ); } fprintf(output, "\n" ); } int count = 0; predict_label = 0; // for ( int l = 0; l < BLOCK ; l++) { // for (int m = 0;m < BLOCK * N; m++) { // // printf("%f\t", p_label[l * BLOCK + m]); // if ( p_label[l * BLOCK + m] == 1) { // // p_label[l] = 1; // // break; // p_label[l]++; // // count++;* 4 // } // } // if (p_label[l] < 4) { // count++; // } // // if ( p_label[l] == 1) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // // if (count >0) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // } // if (count > 0 ) { // predict_label = 0; // } // else { // predict_label = 1; // } // /printf("\n"); // fprintf(output,"%g\n",predict_label); // if(predict_label == target_label) // ++correct; // error += (predict_label-target_label)*(predict_label-target_label); // sump += predict_label; // sumt += target_label; // sumpp += predict_label*predict_label; // sumtt += target_label*target_label; // sumpt += predict_label*target_label; // ++total; } // if(check_regression_model(model_[0])) // { // info("Mean squared error = %g (regression)\n",error/total); // info("Squared correlation coefficient = %g (regression)\n", // ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ // ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) // ); // } // else // info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); // if(flag_predict_probability) // free(prob_estimates); }