/** * Predict class and return confidence */ pair<int, float> MondrianForest::predict_class_confident(Sample& sample) { pair<int, float> prediction (0, 0.0); /* Distance value that influence prediction */ /* Init confidence values */ mondrian_confidence m_conf; /* Go through all trees and calculate probability */ arma::fvec pred_prob = predict_probability(sample, m_conf); int pred_class = -1; /* Predicted class of Mondrian forest */ float tmp_value = 0.; for (int i = 0; i < int(pred_prob.size()); i++) { if (pred_prob[i] > tmp_value) { tmp_value = pred_prob[i]; pred_class = i; } } prediction.first = pred_class; /* Calculate confidence */ float confidence = confidence_prediction(pred_prob, m_conf); prediction.second = confidence; return prediction; }
// return classification error and the normalized difference between predicted and true sentiment std::pair<double, double> do_predict(const struct problem *test_prob, struct model* model_) { double acc = 0; double clse=0; int total = 0; double *prob_estimates=NULL; int *labels=NULL; int nr_class=get_nr_class(model_); if(flag_predict_probability) { if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); } int l = test_prob->l; int i = 0; for(i=0; i<l; i++) { int predict_label = 0; int target_label=test_prob->y[i]; feature_node *xi = test_prob->x[i]; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,xi,prob_estimates); double predict_score=0; for(j=0;j<model_->nr_class;j++) predict_score+=prob_estimates[j]*labels[j]; //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_score - target_label)*(predict_score - target_label)))/acc_max; acc += (predict_score - target_label) * (predict_score - target_label); if (predict_label!=target_label) clse++; } else { predict_label = predict(model_,xi); //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_label - target_label)*(predict_label - target_label)))/acc_max; acc += (predict_label - target_label) * (predict_label - target_label); if (predict_label!=target_label) clse++; } ++total; } if(flag_predict_probability) { free(prob_estimates); free(labels); } //printf("Error = %g%% (%d/%d)\n",(double) (total-correct)/total*100,total-correct,total); return std::make_pair(clse/total,acc/total) ; }
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){ try{ // Input validation: Uninitialized model if (model_in == nullptr || model_in->w == nullptr || (*model_in->w)->dimSize == 0) throw LVException(__FILE__, __LINE__, "Uninitialized model passed to liblinear_predict_probability."); // Input validation: Empty feature vector if (x_in == nullptr || (*x_in)->dimSize == 0) throw LVException(__FILE__, __LINE__, "Empty feature vector passed to liblinear_predict_probability."); // Input validation: Final index -1? if ((*x_in)->elt[(*x_in)->dimSize - 1].index != -1) throw LVException(__FILE__, __LINE__, "The index of the last element of the feature vector needs to be -1 (liblinear_predict_probability)."); // Convert LVsvm_model to svm_model auto mdl = std::make_unique<model>(); LVConvertModel(*model_in, *mdl); // Check probability model int valid_probability = check_probability_model(mdl.get()); if (!valid_probability) throw LVException(__FILE__, __LINE__, "The selected solver type does not support probability output."); // Allocate room for probability estimates LVResizeNumericArrayHandle(prob_estimates_out, mdl->nr_class); (*prob_estimates_out)->dimSize = mdl->nr_class; double highest_prob_label = predict_probability(mdl.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt); return highest_prob_label; } catch (LVException &ex) { ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (std::exception &ex) { LVException::returnStdException(lvErr, __FILE__, __LINE__, ex); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (...) { LVException ex(__FILE__, __LINE__, "Unknown exception has occurred"); ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } }
int copy_prob_predict(char *predict, struct model *model_, npy_intp *predict_dims, char *dec_values) { struct feature_node **predict_nodes; int i; int n, m; n = predict_dims[0]; m = model_->nr_class; predict_nodes = dense_to_sparse((double *) predict, predict_dims, model_->bias); if (predict_nodes == NULL) return -1; for(i=0; i<n; ++i) { predict_probability(model_, predict_nodes[i], ((double *) dec_values) + i*m); free(predict_nodes[i]); } free(predict_nodes); return 0; }
/* * Predict class of current class */ int MondrianForest::predict_class(Sample& sample) { /* Go through all trees and calculate probability */ //float expo_param = 1.0; mondrian_confidence m_conf; arma::fvec pred_prob = predict_probability(sample, m_conf); int pred_class = -1; /* Predicted class of Mondrian forest */ /* If all probabilies are the same -> return -2 */ if (equal_elements(pred_prob)) { return -2; } float tmp_value = 0.; for (int i = 0; i < int(pred_prob.size()); i++) { if (pred_prob[i] > tmp_value) { tmp_value = pred_prob[i]; pred_class = i; } } return pred_class; }
double* call_predict_proba(const struct model *model_, double* x, int n_rows, int n_cols, int n_classes) { int i, j; struct feature_node** fn_x; double* result; double* proba; result = calloc(n_rows * n_classes, sizeof(double)); proba = calloc(n_classes, sizeof(double)); fn_x = build_feature_node(x, n_rows, n_cols, -1); for (i = 0; i < n_rows; ++i) { predict_probability(model_, fn_x[i], proba); for (j = 0; j < n_classes; ++j) result[i*n_classes+j] = proba[j]; } free(proba); return result; }
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){ try{ // Convert LVsvm_model to svm_model std::unique_ptr<model> model(new model); LVConvertModel(model_in, model.get()); // Check probability model int valid_probability = check_probability_model(model.get()); if (!valid_probability) throw LVException(__FILE__, __LINE__, "The model does not support probability output."); // Allocate room for probability estimates LVResizeNumericArrayHandle(prob_estimates_out, model->nr_class); (*prob_estimates_out)->dimSize = model->nr_class; double highest_prob_label = predict_probability(model.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt); return highest_prob_label; } catch (LVException &ex) { ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (std::exception &ex) { LVException::returnStdException(lvErr, __FILE__, __LINE__, ex); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } catch (...) { LVException ex(__FILE__, __LINE__, "Unknown exception has occurred"); ex.returnError(lvErr); (*prob_estimates_out)->dimSize = 0; return std::nan(""); } }
int csr_copy_predict_proba(npy_intp n_features, npy_intp *data_size, char *data, npy_intp *index_size, char *index, npy_intp *indptr_shape, char *indptr, struct model *model_, char *dec_values) { struct feature_node **predict_nodes; int i; double *tx = (double *) dec_values; predict_nodes = csr_to_sparse((double *) data, index_size, (int *) index, indptr_shape, (int *) indptr, model_->bias, n_features); if (predict_nodes == NULL) return -1; for(i=0; i<indptr_shape[0] - 1; ++i) { predict_probability(model_, predict_nodes[i], tx); tx += model_->nr_class; free(predict_nodes[i]); } free(predict_nodes); return 0; }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(model_->normal){ double length = 0; for(int kk = 0; x[kk].index != -1; kk++) length += x[kk].value * x[kk].value; length = sqrt(length); for(int kk = 0; x[kk].index != -1; kk++) x[kk].value /= length; } if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(col_format_flag) pplhs[0] = (mxArray *)prhs[1]; else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target_label, predict_label; target_label = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { predict_label = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); predict_label = predict_values(model_, x, dec_values); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_w;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { mexPrintf("Mean squared error = %g (regression)\n",error/total); mexPrintf("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } //else //mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; ptr[1] = error/total; ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)); free(x); if(prob_estimates != NULL) free(prob_estimates); }
void *predictModelWholeGenome(void *arg) { thread_data_t *data = (thread_data_t *) arg; printf("data->trainedModel is %s\n", data->trainedModel); printf("data->coverageFileList is %s\n", data->coverageFileList); printf("data->trainFile %s\n", data->trainFile); printf("data->paramFile %s\n", data->paramFile); printf("data->chr is %d\n", data->chr); char *trainedModel = data->trainedModel; char *coverageFileList = data->coverageFileList; // char *trainFile = data->trainFile; char *paramFile = data->paramFile; int chr = data->chr; // utility var int i,j,k; // trainedModel struct model *mymodel; if( (mymodel = load_model(trainedModel)) == 0) { printf("cannot load model from file %s\n", trainedModel); return EXIT_SUCCESS; } // coverageFileList int totalCoverageFiles; FILE *coverageFileListFp = NULL; if( (coverageFileListFp = fopen(coverageFileList, "r") ) == NULL) { printf("Cannot open file %s\n", coverageFileList); return EXIT_SUCCESS; } char **coverageFiles = (char **)calloc(MAX_BAM_FILES,sizeof(char *)); for(i = 0; i < MAX_BAM_FILES; i++) { coverageFiles[i] = (char *)calloc(MAX_DIR_LEN, sizeof(char)); } i = 0; while (!feof(coverageFileListFp)) { if (i >= MAX_BAM_FILES) { printf("Error: the number of input coverages files exceeds the limit %d\n", i); return EXIT_SUCCESS; } if( ( fscanf(coverageFileListFp, "%s\n", coverageFiles[i]) ) != 1) { printf("Error: reading %dth from %s\n", i, coverageFileList); return EXIT_SUCCESS; } i++; } totalCoverageFiles = i; fclose(coverageFileListFp); // open coverage Files FILE *coverageFps[totalCoverageFiles]; for(i = 0; i < totalCoverageFiles; i++) { if( (coverageFps[i] = fopen(coverageFiles[i], "rb")) == NULL ) { printf("Error opening coverage file %s\n", coverageFiles[i]); return EXIT_SUCCESS; } } // paramFile struct extractFeatureParam *param = (struct extractFeatureParam *)calloc(1, sizeof(struct extractFeatureParam)); parseParam(paramFile, param); // predict model: by default: predict probability int nr_class = get_nr_class(mymodel); double *prob_estimates = (double *)calloc(nr_class, sizeof(double)); // predResult for storing results int totalBins = 0; int cumBins[NUM_SEQ]; for (i = 0; i < NUM_SEQ; i++) { totalBins += (int)(chrlen[i] / param->resolution) + 1; cumBins[i] = totalBins; } // allocate memory for result based on thread data chr // as we are using one thread for each chr float *predResult = (float *)calloc( (int)(chrlen[chr] / param->resolution) + 1, sizeof(float)); // read in feature for each bin and do prediction for(j = 0; j < (int)(chrlen[chr] / param->resolution) + 1; j++) { if(j % 100000 == 0) { printf("Predicting chr%d:%dth bin\n", chr,j); fflush(stdout); } int max_nr_feature = 100; struct feature_node *myX = (struct feature_node *)calloc(max_nr_feature, sizeof(struct feature_node)); int idx = 0; for(k = 0; k < totalCoverageFiles; k++) { float *buffer = (float *)calloc( param->windowSize/param->resolution,sizeof(float)); int offset = j; offset += -(int)((float)(param->windowSize / 2) / (float)param->resolution + 0.5); if(offset < 0 || offset + (int)((float)(param->windowSize) / (float)param->resolution + 0.5) > (int)(chrlen[i] / param->resolution) + 1) { // printf("offset is %d\n", offset); free(buffer); continue; } if(chr != 0) offset += cumBins[chr-1]; // printf("offset is %d\n", offset); fseek(coverageFps[k], offset*sizeof(float), SEEK_SET); fread(buffer, sizeof(float), param->windowSize/param->resolution, coverageFps[k]); int l; // printf("buffer[%d] is:",l); for(l = 0; l < param->windowSize/param->resolution; l++) { // if(j == 289540) printf("%f,",buffer[l]); if(buffer[l] != 0) { myX[idx].index = k*(param->windowSize/param->resolution) + l + 1; myX[idx].value = buffer[l]; idx++; } if(idx >= max_nr_feature -2) { // feature_node is not long enough max_nr_feature *= 2; myX = (struct feature_node *)realloc(myX, max_nr_feature*sizeof(struct feature_node)); } } free(buffer); } // end of loop through coverageFiles // printf("\n"); myX[idx].index = -1; // a flag for end of features if(idx == 0) { // printf("idx is %d\n",idx); predResult[j] = 0.0; free(myX); continue; } // printf("nr_feature is %d\n", idx); predict_probability(mymodel, myX, prob_estimates); // printf("num of feature is %d\n", get_nr_feature(mymodel)); // printf("num of class is %d\n", get_nr_class(mymodel)); int *mylabel = (int *)calloc(10, sizeof(int)); // added, in order to get the correct label get_labels(mymodel, mylabel); if(mylabel[0] == 1) { predResult[j] = prob_estimates[0]; } else { predResult[j] = prob_estimates[1]; } free(myX); free(mylabel); } for(i = 0; i < totalCoverageFiles; i++) { fclose(coverageFps[i]); } // free pointers for(i = 0; i < MAX_BAM_FILES; i++) { free(coverageFiles[i]); } free(coverageFiles); free(param); free(prob_estimates); // give address of pointer to this function, so that the function can free the pointer. free_and_destroy_model(&mymodel); pthread_exit((void *) predResult); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_instance, *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; int nr_class=get_nr_class(model_); int nr_classifier; double *prob_estimates=NULL; if(nr_class==2) nr_classifier=1; else nr_classifier=nr_class; // prhs[1] = testing instance matrix feature_number = mxGetN(prhs[1]); testing_instance_number = mxGetM(prhs[1]); if(col_format_flag) { feature_number = mxGetM(prhs[1]); testing_instance_number = mxGetN(prhs[1]); } label_vector_row_num = mxGetM(prhs[0]); label_vector_col_num = mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(prhs[1]); ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(mxIsSparse(prhs[1])) { if(col_format_flag) { pplhs[0] = (mxArray *)prhs[1]; } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } else mexPrintf("Testing_instance_matrix must be sparse\n"); prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_classifier, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target,v; target = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { v = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = v; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); v = predict(model_, x); ptr_predict_label[instance_index] = v; predict_values(model_, x, dec_values); for(i=0;i<nr_classifier;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; } if(v == target) ++correct; ++total; } mexPrintf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; free(x); if(prob_estimates != NULL) free(prob_estimates); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; int target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t"); target_label = (int) strtol(label,&endptr,10); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } prob_estimates = (double *) malloc(nr_class*sizeof(double)); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); int clicks = 0; int shows = 0; while(readline(input) != NULL) { int i = 0; double target_ctr, predict_ctr; char *idx, *val, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format char *p = strtok(line," \t\n"); //clicks if(p == NULL) // empty line exit_input_error(total+1); clicks = atoi(p); p = strtok(NULL," \t"); // shows shows = atoi(p); p = strtok(NULL," \t"); // qid:1 if (shows <=0 || clicks > shows) { continue; } target_ctr = (double)clicks / shows; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; predict_probability(model_,x,prob_estimates); fprintf(output,"%d %d ", clicks, shows); predict_ctr = prob_estimates[0]; fprintf(output," %g\n", predict_ctr); } info("total:%d\n",total); free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_[0]); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_[0]); if(model_[0]->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_[0])) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_[0],labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); // target_label = strtod(label,&endptr); switch (label[0]) { case 'A': target_label = 0; break; case 'B': target_label = 1; break; case 'C': target_label = 1; break; case 'D': target_label = 1; break; } // if(endptr == label || *endptr != '\0') // exit_input_error(total+1); for (int pid = 0; pid < sum_pro; pid++) { while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_[pid]->bias>=0) { x[i].index = n; x[i].value = model_[pid]->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_[pid],x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_[pid]->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { p_label[pid] = predict(model_[pid],x); fprintf(output,"%g", p_label[pid]); // printf("pid%dhas done\n",pid ); } fprintf(output, "\n" ); } int count = 0; predict_label = 0; // for ( int l = 0; l < BLOCK ; l++) { // for (int m = 0;m < BLOCK * N; m++) { // // printf("%f\t", p_label[l * BLOCK + m]); // if ( p_label[l * BLOCK + m] == 1) { // // p_label[l] = 1; // // break; // p_label[l]++; // // count++;* 4 // } // } // if (p_label[l] < 4) { // count++; // } // // if ( p_label[l] == 1) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // // if (count >0) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // } // if (count > 0 ) { // predict_label = 0; // } // else { // predict_label = 1; // } // /printf("\n"); // fprintf(output,"%g\n",predict_label); // if(predict_label == target_label) // ++correct; // error += (predict_label-target_label)*(predict_label-target_label); // sump += predict_label; // sumt += target_label; // sumpp += predict_label*predict_label; // sumtt += target_label*target_label; // sumpt += predict_label*target_label; // ++total; } // if(check_regression_model(model_[0])) // { // info("Mean squared error = %g (regression)\n",error/total); // info("Squared correlation coefficient = %g (regression)\n", // ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ // ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) // ); // } // else // info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); // if(flag_predict_probability) // free(prob_estimates); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(model_->param.solver_type!=L2_LR) { fprintf(stderr, "probability output is only supported for logistic regression\n"); return; } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } while(1) { int i = 0; int c; double target; int target_label, predict_label; if (fscanf(input,"%lf",&target)==EOF) break; target_label=(int)target; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } do { c = getc(input); if(c=='\n' || c==EOF) goto out2; } while(isspace(c)); ungetc(c,input); if (fscanf(input,"%d:%lf",&x[i].index,&x[i].value) < 2) { fprintf(stderr,"Wrong input format at line %d\n", total+1); exit(1); } // feature indices larger than those in training are not used if(x[i].index<=nr_feature) ++i; } out2: if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d ",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output,"%g ",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }