void do_predict(FILE *input, FILE *output) { std::vector<double> pred_values; //store decision values std::vector<double> true_values; //store true values int total = 0; int nr_class = get_nr_class(model_); int * labels = Malloc(int, nr_class); get_labels(model_, labels); double * prob_estimates = NULL; int j, n; int nr_feature = get_nr_feature(model_); if(model_->bias >=0) n = nr_feature+1; else n = nr_feature; // not yet support multiclass assert(nr_class==2); //print out header... if(output_option ==2) { prob_estimates = Malloc(double, nr_class); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); }
// return classification error and the normalized difference between predicted and true sentiment std::pair<double, double> do_predict(const struct problem *test_prob, struct model* model_) { double acc = 0; double clse=0; int total = 0; double *prob_estimates=NULL; int *labels=NULL; int nr_class=get_nr_class(model_); if(flag_predict_probability) { if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); } int l = test_prob->l; int i = 0; for(i=0; i<l; i++) { int predict_label = 0; int target_label=test_prob->y[i]; feature_node *xi = test_prob->x[i]; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,xi,prob_estimates); double predict_score=0; for(j=0;j<model_->nr_class;j++) predict_score+=prob_estimates[j]*labels[j]; //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_score - target_label)*(predict_score - target_label)))/acc_max; acc += (predict_score - target_label) * (predict_score - target_label); if (predict_label!=target_label) clse++; } else { predict_label = predict(model_,xi); //double acc_max= fabs(target_label-3)+2; //acc+=(acc_max-sqrt((predict_label - target_label)*(predict_label - target_label)))/acc_max; acc += (predict_label - target_label) * (predict_label - target_label); if (predict_label!=target_label) clse++; } ++total; } if(flag_predict_probability) { free(prob_estimates); free(labels); } //printf("Error = %g%% (%d/%d)\n",(double) (total-correct)/total*100,total-correct,total); return std::make_pair(clse/total,acc/total) ; }
double SVMLinear::predictModel(vector<double> features) { if(modelLinearSVM==NULL) { fprintf(stdout,"Error, Train Model First \n"); return 0.0; } int nr_class=get_nr_class(modelLinearSVM); int bias=modelLinearSVM->bias; int sparsity=0.0; for (int i=0; i<features.size(); i++) if(features[i]!=0.0) sparsity++; feature_node *x=Malloc(struct feature_node,sparsity+bias+1); //bias and -1 index int cnt=0; for (int i=0; i<features.size(); i++) { if(features[i]!=0.0) { x[cnt].index=i+1; x[cnt].value=features[i]; cnt++; } } if(bias) { x[cnt].index=modelLinearSVM->nr_feature+1, x[cnt].value=1; cnt++; } x[cnt].index=-1; double val=0; predict_values(modelLinearSVM,x,&val); return val; }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); target_label = strtod(label,&endptr); if(endptr == label || *endptr != '\0') exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(model_->normal){ double length = 0; for(int kk = 0; x[kk].index != -1; kk++) length += x[kk].value * x[kk].value; length = sqrt(length); for(int kk = 0; x[kk].index != -1; kk++) x[kk].value /= length; } if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%g\n",predict_label); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { info("Mean squared error = %g (regression)\n",error/total); info("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } else info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(col_format_flag) pplhs[0] = (mxArray *)prhs[1]; else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target_label, predict_label; target_label = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { predict_label = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); predict_label = predict_values(model_, x, dec_values); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_w;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) { mexPrintf("Mean squared error = %g (regression)\n",error/total); mexPrintf("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } //else //mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; ptr[1] = error/total; ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)); free(x); if(prob_estimates != NULL) free(prob_estimates); }
void *predictModelWholeGenome(void *arg) { thread_data_t *data = (thread_data_t *) arg; printf("data->trainedModel is %s\n", data->trainedModel); printf("data->coverageFileList is %s\n", data->coverageFileList); printf("data->trainFile %s\n", data->trainFile); printf("data->paramFile %s\n", data->paramFile); printf("data->chr is %d\n", data->chr); char *trainedModel = data->trainedModel; char *coverageFileList = data->coverageFileList; // char *trainFile = data->trainFile; char *paramFile = data->paramFile; int chr = data->chr; // utility var int i,j,k; // trainedModel struct model *mymodel; if( (mymodel = load_model(trainedModel)) == 0) { printf("cannot load model from file %s\n", trainedModel); return EXIT_SUCCESS; } // coverageFileList int totalCoverageFiles; FILE *coverageFileListFp = NULL; if( (coverageFileListFp = fopen(coverageFileList, "r") ) == NULL) { printf("Cannot open file %s\n", coverageFileList); return EXIT_SUCCESS; } char **coverageFiles = (char **)calloc(MAX_BAM_FILES,sizeof(char *)); for(i = 0; i < MAX_BAM_FILES; i++) { coverageFiles[i] = (char *)calloc(MAX_DIR_LEN, sizeof(char)); } i = 0; while (!feof(coverageFileListFp)) { if (i >= MAX_BAM_FILES) { printf("Error: the number of input coverages files exceeds the limit %d\n", i); return EXIT_SUCCESS; } if( ( fscanf(coverageFileListFp, "%s\n", coverageFiles[i]) ) != 1) { printf("Error: reading %dth from %s\n", i, coverageFileList); return EXIT_SUCCESS; } i++; } totalCoverageFiles = i; fclose(coverageFileListFp); // open coverage Files FILE *coverageFps[totalCoverageFiles]; for(i = 0; i < totalCoverageFiles; i++) { if( (coverageFps[i] = fopen(coverageFiles[i], "rb")) == NULL ) { printf("Error opening coverage file %s\n", coverageFiles[i]); return EXIT_SUCCESS; } } // paramFile struct extractFeatureParam *param = (struct extractFeatureParam *)calloc(1, sizeof(struct extractFeatureParam)); parseParam(paramFile, param); // predict model: by default: predict probability int nr_class = get_nr_class(mymodel); double *prob_estimates = (double *)calloc(nr_class, sizeof(double)); // predResult for storing results int totalBins = 0; int cumBins[NUM_SEQ]; for (i = 0; i < NUM_SEQ; i++) { totalBins += (int)(chrlen[i] / param->resolution) + 1; cumBins[i] = totalBins; } // allocate memory for result based on thread data chr // as we are using one thread for each chr float *predResult = (float *)calloc( (int)(chrlen[chr] / param->resolution) + 1, sizeof(float)); // read in feature for each bin and do prediction for(j = 0; j < (int)(chrlen[chr] / param->resolution) + 1; j++) { if(j % 100000 == 0) { printf("Predicting chr%d:%dth bin\n", chr,j); fflush(stdout); } int max_nr_feature = 100; struct feature_node *myX = (struct feature_node *)calloc(max_nr_feature, sizeof(struct feature_node)); int idx = 0; for(k = 0; k < totalCoverageFiles; k++) { float *buffer = (float *)calloc( param->windowSize/param->resolution,sizeof(float)); int offset = j; offset += -(int)((float)(param->windowSize / 2) / (float)param->resolution + 0.5); if(offset < 0 || offset + (int)((float)(param->windowSize) / (float)param->resolution + 0.5) > (int)(chrlen[i] / param->resolution) + 1) { // printf("offset is %d\n", offset); free(buffer); continue; } if(chr != 0) offset += cumBins[chr-1]; // printf("offset is %d\n", offset); fseek(coverageFps[k], offset*sizeof(float), SEEK_SET); fread(buffer, sizeof(float), param->windowSize/param->resolution, coverageFps[k]); int l; // printf("buffer[%d] is:",l); for(l = 0; l < param->windowSize/param->resolution; l++) { // if(j == 289540) printf("%f,",buffer[l]); if(buffer[l] != 0) { myX[idx].index = k*(param->windowSize/param->resolution) + l + 1; myX[idx].value = buffer[l]; idx++; } if(idx >= max_nr_feature -2) { // feature_node is not long enough max_nr_feature *= 2; myX = (struct feature_node *)realloc(myX, max_nr_feature*sizeof(struct feature_node)); } } free(buffer); } // end of loop through coverageFiles // printf("\n"); myX[idx].index = -1; // a flag for end of features if(idx == 0) { // printf("idx is %d\n",idx); predResult[j] = 0.0; free(myX); continue; } // printf("nr_feature is %d\n", idx); predict_probability(mymodel, myX, prob_estimates); // printf("num of feature is %d\n", get_nr_feature(mymodel)); // printf("num of class is %d\n", get_nr_class(mymodel)); int *mylabel = (int *)calloc(10, sizeof(int)); // added, in order to get the correct label get_labels(mymodel, mylabel); if(mylabel[0] == 1) { predResult[j] = prob_estimates[0]; } else { predResult[j] = prob_estimates[1]; } free(myX); free(mylabel); } for(i = 0; i < totalCoverageFiles; i++) { fclose(coverageFps[i]); } // free pointers for(i = 0; i < MAX_BAM_FILES; i++) { free(coverageFiles[i]); } free(coverageFiles); free(param); free(prob_estimates); // give address of pointer to this function, so that the function can free the pointer. free_and_destroy_model(&mymodel); pthread_exit((void *) predResult); }
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_instance, *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; int nr_class=get_nr_class(model_); int nr_classifier; double *prob_estimates=NULL; if(nr_class==2) nr_classifier=1; else nr_classifier=nr_class; // prhs[1] = testing instance matrix feature_number = mxGetN(prhs[1]); testing_instance_number = mxGetM(prhs[1]); if(col_format_flag) { feature_number = mxGetM(prhs[1]); testing_instance_number = mxGetN(prhs[1]); } label_vector_row_num = mxGetM(prhs[0]); label_vector_col_num = mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(prhs[1]); ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(mxIsSparse(prhs[1])) { if(col_format_flag) { pplhs[0] = (mxArray *)prhs[1]; } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } else mexPrintf("Testing_instance_matrix must be sparse\n"); prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_classifier, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target,v; target = ptr_label[instance_index]; // prhs[1] and prhs[1]^T are sparse read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); if(predict_probability_flag) { v = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = v; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { double *dec_values = Malloc(double, nr_class); v = predict(model_, x); ptr_predict_label[instance_index] = v; predict_values(model_, x, dec_values); for(i=0;i<nr_classifier;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; } if(v == target) ++correct; ++total; } mexPrintf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; free(x); if(prob_estimates != NULL) free(prob_estimates); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; int target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t"); target_label = (int) strtol(label,&endptr,10); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }
double binary_class_cross_validation(const problem *prob, const parameter *param, int nr_fold) { dvec_t dec_values; ivec_t ty; int *labels; if (nr_fold > 1) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); std::swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct problem subprob; subprob.l = l-(end-begin); subprob.x = Malloc(struct feature_node*,subprob.l); subprob.y = Malloc(int,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct model *submodel = train(&subprob,param); //int svm_type = get_svm_type(submodel); //if(svm_type == NU_SVR || svm_type == EPSILON_SVR){ // fprintf(stderr, "wrong svm type"); // exit(1); //} labels = Malloc(int, get_nr_class(submodel)); get_labels(submodel, labels); if(get_nr_class(submodel) > 2) { fprintf(stderr,"Error: the number of class is not equal to 2\n"); exit(-1); } dec_values.resize(end); ty.resize(end); for(j=begin;j<end;j++) { predict_values(submodel,prob->x[perm[j]], &dec_values[j]); ty[j] = (prob->y[perm[j]] > 0)? 1: -1; } if(labels[0] <= 0) { for(j=begin;j<end;j++) dec_values[j] *= -1; } free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); free(labels); } free(perm); free(fold_start); }
void do_predict(FILE *input, FILE *output) { int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(!check_probability_model(model_)) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } prob_estimates = (double *) malloc(nr_class*sizeof(double)); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); int clicks = 0; int shows = 0; while(readline(input) != NULL) { int i = 0; double target_ctr, predict_ctr; char *idx, *val, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format char *p = strtok(line," \t\n"); //clicks if(p == NULL) // empty line exit_input_error(total+1); clicks = atoi(p); p = strtok(NULL," \t"); // shows shows = atoi(p); p = strtok(NULL," \t"); // qid:1 if (shows <=0 || clicks > shows) { continue; } target_ctr = (double)clicks / shows; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; predict_probability(model_,x,prob_estimates); fprintf(output,"%d %d ", clicks, shows); predict_ctr = prob_estimates[0]; fprintf(output," %g\n", predict_ctr); } info("total:%d\n",total); free(prob_estimates); }
void do_predict(FILE *input, FILE *output) { int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int nr_class=get_nr_class(model_[0]); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_[0]); if(model_[0]->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(!check_probability_model(model_[0])) { fprintf(stderr, "probability output is only supported for logistic regression\n"); exit(1); } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_[0],labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = 0; // strtol gives 0 if wrong format label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(total+1); // target_label = strtod(label,&endptr); switch (label[0]) { case 'A': target_label = 0; break; case 'B': target_label = 1; break; case 'C': target_label = 1; break; case 'D': target_label = 1; break; } // if(endptr == label || *endptr != '\0') // exit_input_error(total+1); for (int pid = 0; pid < sum_pro; pid++) { while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); // feature indices larger than those in training are not used if(x[i].index <= nr_feature) ++i; } if(model_[pid]->bias>=0) { x[i].index = n; x[i].value = model_[pid]->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_[pid],x,prob_estimates); fprintf(output,"%g",predict_label); for(j=0;j<model_[pid]->nr_class;j++) fprintf(output," %g",prob_estimates[j]); fprintf(output,"\n"); } else { p_label[pid] = predict(model_[pid],x); fprintf(output,"%g", p_label[pid]); // printf("pid%dhas done\n",pid ); } fprintf(output, "\n" ); } int count = 0; predict_label = 0; // for ( int l = 0; l < BLOCK ; l++) { // for (int m = 0;m < BLOCK * N; m++) { // // printf("%f\t", p_label[l * BLOCK + m]); // if ( p_label[l * BLOCK + m] == 1) { // // p_label[l] = 1; // // break; // p_label[l]++; // // count++;* 4 // } // } // if (p_label[l] < 4) { // count++; // } // // if ( p_label[l] == 1) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // // if (count >0) { // // predict_label = 1; // // } // // else { // // predict_label = 0; // // } // } // if (count > 0 ) { // predict_label = 0; // } // else { // predict_label = 1; // } // /printf("\n"); // fprintf(output,"%g\n",predict_label); // if(predict_label == target_label) // ++correct; // error += (predict_label-target_label)*(predict_label-target_label); // sump += predict_label; // sumt += target_label; // sumpp += predict_label*predict_label; // sumtt += target_label*target_label; // sumpt += predict_label*target_label; // ++total; } // if(check_regression_model(model_[0])) // { // info("Mean squared error = %g (regression)\n",error/total); // info("Squared correlation coefficient = %g (regression)\n", // ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ // ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) // ); // } // else // info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); // if(flag_predict_probability) // free(prob_estimates); }
void do_predict(FILE *input, FILE *output, struct model* model_) { int correct = 0; int total = 0; int nr_class=get_nr_class(model_); double *prob_estimates=NULL; int j, n; int nr_feature=get_nr_feature(model_); if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; if(flag_predict_probability) { int *labels; if(model_->param.solver_type!=L2_LR) { fprintf(stderr, "probability output is only supported for logistic regression\n"); return; } labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); fprintf(output,"labels"); for(j=0;j<nr_class;j++) fprintf(output," %d",labels[j]); fprintf(output,"\n"); free(labels); } while(1) { int i = 0; int c; double target; int target_label, predict_label; if (fscanf(input,"%lf",&target)==EOF) break; target_label=(int)target; while(1) { if(i>=max_nr_attr-2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); } do { c = getc(input); if(c=='\n' || c==EOF) goto out2; } while(isspace(c)); ungetc(c,input); if (fscanf(input,"%d:%lf",&x[i].index,&x[i].value) < 2) { fprintf(stderr,"Wrong input format at line %d\n", total+1); exit(1); } // feature indices larger than those in training are not used if(x[i].index<=nr_feature) ++i; } out2: if(model_->bias>=0) { x[i].index = n; x[i].value = model_->bias; i++; } x[i].index = -1; if(flag_predict_probability) { int j; predict_label = predict_probability(model_,x,prob_estimates); fprintf(output,"%d ",predict_label); for(j=0;j<model_->nr_class;j++) fprintf(output,"%g ",prob_estimates[j]); fprintf(output,"\n"); } else { predict_label = predict(model_,x); fprintf(output,"%d\n",predict_label); } if(predict_label == target_label) ++correct; ++total; } printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); }