void predict_strand_SCI_ConsMFE_Z(double *prob, double *decValue, double deltaSCI, double deltaConsMFE, double deltaZ, int n_seq, double id) { struct svm_node node[6]; double value=0; node[0].index = 1; node[0].value = deltaSCI; node[1].index = 2; node[1].value = deltaConsMFE; node[2].index = 3; node[2].value = deltaZ; node[3].index = 4; node[3].value = (double)n_seq; node[4].index = 5; node[4].value = id; node[5].index = -1; scale_strand_decision_node((struct svm_node*)&node); svm_predict_values(strand_model,node,&value); *decValue=value; svm_predict_probability(strand_model,node,&value); // ATTENTION: If model parameters are not set correctly, then value is not set, type of SVM need to // to be C_SVC or nu_SVC! *prob=value; return; }
void predict(mxArray *plhs[], const mxArray *prhs[], struct svm_model *model, const int predict_probability) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_instance, *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct svm_node *x; mxArray *pplhs[1]; // transposed instance sparse matrix int correct = 0; int total = 0; double error = 0; double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; int svm_type=svm_get_svm_type(model); int nr_class=svm_get_nr_class(model); double *prob_estimates=NULL; // prhs[1] = testing instance matrix feature_number = (int)mxGetN(prhs[1]); testing_instance_number = (int)mxGetM(prhs[1]); label_vector_row_num = (int)mxGetM(prhs[0]); label_vector_col_num = (int)mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(prhs[1]); ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(mxIsSparse(prhs[1])) { if(model->param.kernel_type == PRECOMPUTED) { // precomputed kernel requires dense matrix, so we make one mxArray *rhs[1], *lhs[1]; rhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, lhs, 1, rhs, "full")) { mexPrintf("Error: cannot full testing instance matrix\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(lhs[0]); mxDestroyArray(rhs[0]); } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } if(predict_probability) { if(svm_type==NU_SVR || svm_type==EPSILON_SVR) mexPrintf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model)); else prob_estimates = (double *) malloc(nr_class*sizeof(double)); } plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability) { // prob estimates are in plhs[2] if(svm_type==C_SVC || svm_type==NU_SVC) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL); } else { // decision values are in plhs[2] if(svm_type == ONE_CLASS || svm_type == EPSILON_SVR || svm_type == NU_SVR) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class*(nr_class-1)/2, mxREAL); } ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = (struct svm_node*)malloc((feature_number+1)*sizeof(struct svm_node) ); for(instance_index=0;instance_index<testing_instance_number;instance_index++) { int i; double target_label, predict_label; target_label = ptr_label[instance_index]; if(mxIsSparse(prhs[1]) && model->param.kernel_type != PRECOMPUTED) // prhs[1]^T is still sparse read_sparse_instance(pplhs[0], instance_index, x); else { for(i=0;i<feature_number;i++) { x[i].index = i+1; x[i].value = ptr_instance[testing_instance_number*i+instance_index]; } x[feature_number].index = -1; } if(predict_probability) { if(svm_type==C_SVC || svm_type==NU_SVC) { predict_label = svm_predict_probability(model, x, prob_estimates); ptr_predict_label[instance_index] = predict_label; for(i=0;i<nr_class;i++) ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; } else { predict_label = svm_predict(model,x); ptr_predict_label[instance_index] = predict_label; } } else { predict_label = svm_predict(model,x); ptr_predict_label[instance_index] = predict_label; if(svm_type == ONE_CLASS || svm_type == EPSILON_SVR || svm_type == NU_SVR) { double res; svm_predict_values(model, x, &res); ptr_dec_values[instance_index] = res; } else { double *dec_values = (double *) malloc(sizeof(double) * nr_class*(nr_class-1)/2); svm_predict_values(model, x, dec_values); for(i=0;i<(nr_class*(nr_class-1))/2;i++) ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; free(dec_values); } } if(predict_label == target_label) ++correct; error += (predict_label-target_label)*(predict_label-target_label); sump += predict_label; sumt += target_label; sumpp += predict_label*predict_label; sumtt += target_label*target_label; sumpt += predict_label*target_label; ++total; } if(svm_type==NU_SVR || svm_type==EPSILON_SVR) { mexPrintf("Mean squared error = %g (regression)\n",error/total); mexPrintf("Squared correlation coefficient = %g (regression)\n", ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) ); } /* else */ /* mexPrintf("Accuracy = %g%% (%d/%d) (classification)\n", */ /* (double)correct/total*100,correct,total); */ // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); ptr = mxGetPr(plhs[1]); ptr[0] = (double)correct/total*100; ptr[1] = error/total; ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)); free(x); if(prob_estimates != NULL) free(prob_estimates); }
void svmpredict (int *decisionvalues, int *probability, double *v, int *r, int *c, int *rowindex, int *colindex, double *coefs, double *rho, int *compprob, double *probA, double *probB, int *nclasses, int *totnSV, int *labels, int *nSV, int *sparsemodel, int *svm_type, int *kernel_type, int *degree, double *gamma, double *coef0, double *x, int *xr, int *xrowindex, int *xcolindex, int *sparsex, double *ret, double *dec, double *prob) { struct svm_model m; struct svm_node ** train; int i; /* set up model */ m.l = *totnSV; m.nr_class = *nclasses; m.sv_coef = (double **) malloc (m.nr_class * sizeof(double*)); for (i = 0; i < m.nr_class - 1; i++) { m.sv_coef[i] = (double *) malloc (m.l * sizeof (double)); memcpy (m.sv_coef[i], coefs + i*m.l, m.l * sizeof (double)); } if (*sparsemodel > 0) m.SV = transsparse(v, *r, rowindex, colindex); else m.SV = sparsify(v, *r, *c); m.rho = rho; m.probA = probA; m.probB = probB; m.label = labels; m.nSV = nSV; /* set up parameter */ m.param.svm_type = *svm_type; m.param.kernel_type = *kernel_type; m.param.degree = *degree; m.param.gamma = *gamma; m.param.coef0 = *coef0; m.param.probability = *compprob; m.free_sv = 1; /* create sparse training matrix */ if (*sparsex > 0) train = transsparse(x, *xr, xrowindex, xcolindex); else train = sparsify(x, *xr, *c); /* call svm-predict-function for each x-row, possibly using probability estimator, if requested */ if (*probability && svm_check_probability_model(&m)) { for (i = 0; i < *xr; i++) ret[i] = svm_predict_probability(&m, train[i], prob + i * *nclasses); } else { for (i = 0; i < *xr; i++) ret[i] = svm_predict(&m, train[i]); } /* optionally, compute decision values */ if (*decisionvalues) for (i = 0; i < *xr; i++) svm_predict_values(&m, train[i], dec + i * *nclasses * (*nclasses - 1) / 2); /* clean up memory */ for (i = 0; i < *xr; i++) free (train[i]); free (train); for (i = 0; i < *r; i++) free (m.SV[i]); free (m.SV); for (i = 0; i < m.nr_class - 1; i++) free(m.sv_coef[i]); free(m.sv_coef); }
PRIVATE void classify(double* prob, double* decValue, struct svm_model* decision_model, double id,int n_seq, double z,double sci, double entropy, int decision_model_type){ FILE *out=stdout; /* Output file */ /************************************/ /* normal model as used in RNAz 1.0 */ /************************************/ if (decision_model_type == 1) { struct svm_node node[5]; double* value; value=(double*)space(sizeof(double)*2); node[0].index = 1; node[0].value = z; node[1].index = 2; node[1].value = sci; node[2].index = 3; node[2].value = id; node[3].index = 4; node[3].value = n_seq; node[4].index =-1; scale_decision_node((struct svm_node*)&node,decision_model_type); svm_predict_values(decision_model,node,value); *decValue=value[0]; svm_predict_probability(decision_model,node,value); *prob=value[0]; free(value); } /****************************************************/ /* dinucleotide model for sequence based alignments */ /****************************************************/ if (decision_model_type == 2) { /* For training we used the z-score and the SCI rounded to tow decimal places. To make results comparable we do it also here. */ char tmp[10]; sprintf(tmp, "%.2f", (double) z); double z_tmp = (double) atof(tmp); sprintf(tmp, "%.2f", (double) sci); double sci_tmp = (double) atof(tmp); /* In some rare cases it might happen that z-score and SCI are out of the training range. In these cases they are just set to the maximum or minimum. */ if (z_tmp > 2.01) z_tmp = 2.01; if (z_tmp < -8.15) z_tmp = -8.15; if (sci_tmp > 1.29) sci_tmp = 1.29; /* construct SVM node and scale parameters */ struct svm_node node[4]; double* value; value=(double*)space(sizeof(double)*2); node[0].index = 1; node[0].value = z_tmp; node[1].index = 2; node[1].value = sci_tmp; node[2].index = 3; node[2].value = entropy; node[3].index =-1; scale_decision_node((struct svm_node*)&node,decision_model_type); /* For training we used scaled variables rounded to five decimal places. To make results comparable we do it also here. */ sprintf(tmp, "%.5f", (double) node[0].value); node[0].value = (double) atof(tmp); sprintf(tmp, "%.5f", (double) node[1].value); node[1].value = (double) atof(tmp); sprintf(tmp, "%.5f", (double) node[2].value); node[2].value = (double) atof(tmp); /* Now predict decision value and probability */ svm_predict_values(decision_model,node,value); *decValue=value[0]; svm_predict_probability(decision_model,node,value); *prob=value[0]; free(value); } /***********************************************************************/ /* dinucleotide model for structural alignments generated by locarnate */ /***********************************************************************/ if (decision_model_type == 3) { /* For training we used the z-score and the SCI rounded to tow decimal places. To make results comparable we do it also here. */ char tmp[10]; sprintf(tmp, "%.2f", (double) z); double z_tmp = (double) atof(tmp); sprintf(tmp, "%.2f", (double) sci); double sci_tmp = (double) atof(tmp); /* In some rare cases it might happen that z-score and SCI are out of the training range. In these cases they are just set to the maximum or minimum. */ if (z_tmp > 2.01) z_tmp = 2.01; if (z_tmp < -8.13) z_tmp = -8.13; if (sci_tmp > 1.31) sci_tmp = 1.31; /*fprintf(out," z: %f, sci: %f, entropy:%f\n",z_tmp,sci_tmp,entropy);*/ /* construct SVM node and scale parameters */ struct svm_node node[4]; double* value; value=(double*)space(sizeof(double)*2); node[0].index = 1; node[0].value = z_tmp; node[1].index = 2; node[1].value = sci_tmp; node[2].index = 3; node[2].value = entropy; node[3].index =-1; scale_decision_node((struct svm_node*)&node,decision_model_type); /* For training we used scaled variables rounded to five decimal places. To make results comparable we do it also here. */ sprintf(tmp, "%.5f", (double) node[0].value); node[0].value = (double) atof(tmp); sprintf(tmp, "%.5f", (double) node[1].value); node[1].value = (double) atof(tmp); sprintf(tmp, "%.5f", (double) node[2].value); node[2].value = (double) atof(tmp); /*fprintf(out," z: %f, sci: %f, entropy:%f\n",node[0].value,node[1].value,node[2].value);*/ /* Now predict decision value and probability */ svm_predict_values(decision_model,node,value); *decValue=value[0]; svm_predict_probability(decision_model,node,value); *prob=value[0]; free(value); } }
double binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold) { dvec_t dec_values; ivec_t ty; int *labels; if (nr_fold > 1) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); std::swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct svm_problem subprob; subprob.l = l-(end-begin); subprob.x = Malloc(struct svm_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct svm_model *submodel = svm_train(&subprob,param); int svm_type = svm_get_svm_type(submodel); if(svm_type == NU_SVR || svm_type == EPSILON_SVR){ fprintf(stderr, "wrong svm type"); exit(1); } labels = Malloc(int, svm_get_nr_class(submodel)); svm_get_labels(submodel, labels); if(svm_get_nr_class(submodel) > 2) { fprintf(stderr,"Error: the number of class is not equal to 2\n"); exit(-1); } dec_values.resize(end); ty.resize(end); for(j=begin;j<end;j++) { svm_predict_values(submodel,prob->x[perm[j]], &dec_values[j]); ty[j] = (prob->y[perm[j]] > 0)? 1: -1; } if(labels[0] <= 0) { for(j=begin;j<end;j++) dec_values[j] *= -1; } svm_free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); free(labels); } free(perm); free(fold_start); }
void binary_class_predict(FILE *input, FILE *output){ int total = 0; int *labels; int max_nr_attr = 64; struct svm_node *x = Malloc(struct svm_node, max_nr_attr); dvec_t dec_values; ivec_t true_labels; int svm_type=svm_get_svm_type(model); if (svm_type==NU_SVR || svm_type==EPSILON_SVR){ fprintf(stderr, "wrong svm type."); exit(1); } labels = Malloc(int, svm_get_nr_class(model)); svm_get_labels(model, labels); max_line_len = 1024; line = (char *)malloc(max_line_len*sizeof(char)); while(readline(input) != NULL) { int i = 0; double target_label, predict_label; char *idx, *val, *label, *endptr; int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0 label = strtok(line," \t"); target_label = strtod(label,&endptr); if(endptr == label) exit_input_error(total+1); while(1) { if(i>=max_nr_attr - 2) // need one more for index = -1 { max_nr_attr *= 2; x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node)); } idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x[i].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) exit_input_error(total+1); else inst_max_index = x[i].index; errno = 0; x[i].value = strtod(val,&endptr); if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) exit_input_error(total+1); ++i; } x[i].index = -1; predict_label = svm_predict(model,x); fprintf(output,"%g\n",predict_label); double dec_value; svm_predict_values(model, x, &dec_value); true_labels.push_back((target_label > 0)? 1: -1); if(labels[0] <= 0) dec_value *= -1; dec_values.push_back(dec_value); } // validation_function(dec_values, true_labels); accuracy(dec_values, true_labels); bac(dec_values, true_labels); free(labels); free(x); }
v8::Local<v8::Object> SVMPredict::predict_single ( const struct svm_model* m, const struct svm_node* p, v8::Local<v8::Value> id, int nr) { #if defined(PROCESS_DEBUG) log("is called."); #endif if (m==NULL) { std::cerr<<"The model variable is NULL\n"; } if (p == NULL) { std::cerr<<"The query vector variable is NULL\n"; } // v8::Isolate* isolate = GetIsolate();//v8::Isolate::GetCurrent(); // v8::HandleScope scope(isolate); // v8::Local<v8::Context> context = // v8::Local<v8::Context>::New(isolate,context_); // v8::Context::Scope context_scop(context); v8::Local<v8::Object> Result = v8::Object::New(isolate); double dec_values = std::numeric_limits<double>::infinity(); double pred_result = std::numeric_limits<double>::infinity(); double *probab_estimate = (double *) malloc(nr*sizeof(double)); int *labels=(int *) malloc(nr*sizeof(int)); svm_get_labels(this->model,labels); #if defined(PROCESS_DEBUG) // CW_ASSERT(m==NULL); // CW_ASSERT(p==NULL); log(" Ready to call local function predict"); #endif //this->predict(m,p,&dec_values,&pred_result); //this->print_node(p); pred_result = svm_predict_values(m, p, &dec_values); Result->Set(v8::String::NewFromUtf8(isolate, "id"),id); Result->Set(v8::String::NewFromUtf8(isolate, "dec_val"),ToV8<double>(dec_values)); Result->Set(v8::String::NewFromUtf8(isolate, "predict"),ToV8<double>(pred_result)); if (this->predict_probability) { #if defined(PROCESS_DEBUG) //CW_ASSERT(m==NULL); //CW_ASSERT(p==NULL); log(" Ready to call local function predict_prob"); #endif this->predict_prob(m, p, &pred_result, probab_estimate); //pred_result = svm_predict_probability(this->model,p,probab_estimate); v8::Local<v8::Object> prob = v8::Object::New(isolate); for(int i=0;i<nr;i++){ std::string cl = _toString<int>(labels[i],std::dec); prob->Set(v8::String::NewFromUtf8(isolate, cl.c_str()),ToV8<double>(probab_estimate[i])); } Result->Set(v8::String::NewFromUtf8(isolate, "probability"),prob); } return Result; }
int main(int argc, char **argv) { model = svm_load_model(argv[1]); double * decision_value = new double[model->l]; int * predict_label = new int[model->l]; int * predict_state = new int[model->l];//1:TP 2:FP 3:TN 4:FN double decval = 0; int num_TP = 0; int num_FP = 0; int num_TN = 0; int num_FN = 0; printf("Reclassify the Support Vectors\n"); for(int i = 0; i < model->l; ++i) { predict_label[i] = svm_predict_values(model, model->SV[i], &decval); decision_value[i] = decval; if(model->sv_coef[0][i] > 0 && predict_label[i] == 1){ predict_state[i] = 1; num_TP ++; } else if(model->sv_coef[0][i] <= 0 && predict_label[i] == 1){ predict_state[i] = 2; num_FP ++; } else if(model->sv_coef[0][i] <= 0 && predict_label[i] == -1){ predict_state[i] = 3; num_TN ++; } else if(model->sv_coef[0][i] > 0 && predict_label[i] == -1){ predict_state[i] = 4; num_FN ++; } } printf("\n TP:%d FP:%d TN:%d FN:%d\n",num_TP,num_FP,num_TN,num_FN); int num_feature = 1250; double *mean_1 = new double[num_feature]; double *mean_2 = new double[num_feature]; double *mean_P = new double[num_feature]; double *mean_N = new double[num_feature]; int *num_total = new int[num_feature]; int *num_P = new int[num_feature]; int *num_N = new int[num_feature]; double *var = new double[num_feature]; double *sigma = new double[num_feature]; double *zscore = new double[num_feature]; for(int i = 0; i < num_feature; ++i) { mean_1[i] = 0; mean_2[i] = 0; mean_P[i] = 0; mean_N[i] = 0; num_total[i] = 0; num_P[i] = 0; num_N[i] = 0; var[i] = 0; sigma [i] = 0; zscore[i] = 0; for(int j = 0; j < model->l; ++j) { int flag = 0; if(predict_state[j] == 1 || predict_state[j] == 3){ for(int k = 0; model->SV[j][k].index != -1; k ++) { if(model->SV[j][k].index == (i + 1)) { flag = 1; num_total[i] ++; mean_1[i] += model->SV[j][k].value; mean_2[i] += model->SV[j][k].value * model->SV[j][k].value; if(model->sv_coef[0][j] > 0){ num_P[i]++; mean_P[i] += model->SV[j][k].value; } else{ num_N[i]++; mean_N[i] += model->SV[j][k].value; } } } if(flag == 0){ num_total[i] ++; if(model->sv_coef[0][j] > 0) num_P[i]++; else num_N[i]++; } } } } for(int i = 0; i < num_feature; ++i) { printf("%d: total:%d ",i+1,num_total[i]); if(num_total[i] != 0){ mean_1[i] /= num_total[i]; mean_2[i] /= num_total[i]; printf("mean_1:%f mean_2:%f ",mean_1[i],mean_2[i]); var[i] = mean_2[i] - mean_1[i] * mean_1[i]; sigma[i] = sqrt(var[i]); printf("var:%f sigma:%f ",var[i],sigma[i]); if(num_P[i] != 0){ mean_P[i] /= num_P[i]; printf("mean_P:%f ",mean_P[i]); } if(num_N[i] != 0){ mean_N[i] /= num_N[i]; printf("mean_N:%f ",mean_N[i]); } if(num_P[i] != 0 && num_N[i] != 0 && var[i] != 0 && sigma[i]!= 0){ zscore[i] = (mean_P[i] - mean_N[i])/sigma[i]; printf("zscore:%f ",zscore[i]); } } printf("\n"); } double *zscore_fabs = new double[num_feature]; int num_2 = 0; int num_3 = 0; int num_4 = 0; int num_5 = 0; int num_7 = 0; int num_10 = 0; int j = 0; for(int i = 0; i < num_feature; ++i){ zscore_fabs[i] = fabs(zscore[i]); if(zscore_fabs[i] >= 0.2) num_2 ++; if(zscore_fabs[i] >= 0.3) num_3 ++; if(zscore_fabs[i] >= 0.4) num_4 ++; if(zscore_fabs[i] >= 0.5) num_5 ++; if(zscore_fabs[i] >= 0.7) num_7 ++; if(zscore_fabs[i] >= 1.0) num_10 ++; } printf("Number of features whose zscore is\n"); printf(">=0.2:%d\n>=0.3:%d\n>=0.4:%d\n>=0.5:%d\n>=0.7:%d\n>=1.0:%d\n",num_2,num_3,num_4,num_5,num_7,num_10); double *zscore_fabs_select = new double[num_10]; int *idx_select = new int[num_10]; j = 0; for(int i = 0; i < num_feature; ++i){ if(zscore_fabs[i] >= 1.0){ idx_select[j] = i; zscore_fabs_select[j] = zscore_fabs[i]; ++j; } } //for(int i = 0; i < num_10; ++i) // printf("index:%d zscore:%f total:%d P:%d N:%d mean_1:%f mean_2:%f var:%f sigma:%f mean_P:%f mean_N:%f\n",idx_select[i],zscore[idx_select[i]],num_total[idx_select[i]],num_P[idx_select[i]],num_N[idx_select[i]],mean_1[idx_select[i]],mean_2[idx_select[i]],var[idx_select[i]],sigma[idx_select[i]],mean_P[idx_select[i]],mean_N[idx_select[i]]); printf("\n"); quicksort(zscore_fabs_select, idx_select, 0, num_10 - 1); printf("Features whose zscore is bigger than 1.0:\n"); for(int i = num_10 - 1; i >= 0; --i) printf("index:%d zscore:%f mean_1:%f mean_2:%f var:%f sigma:%f mean_P:%f mean_N:%f\n",idx_select[i] + 1,zscore[idx_select[i]],mean_1[idx_select[i]],mean_2[idx_select[i]],var[idx_select[i]],sigma[idx_select[i]],mean_P[idx_select[i]],mean_N[idx_select[i]]); double *threshold = new double[num_10]; double *TPR = new double[num_10]; double *FPR = new double[num_10]; double *ratio = new double[num_10]; printf("Rank the features by TPR/FPR ratio:\n"); for(int i = 0; i < num_10; ++i){ double threshold_1 = -1; int TP = 0; int FP = 0; double ratio_1 = 0; int index = idx_select[i]; //printf("index:%d zscore:%f ",index,zscore[index]); if(zscore[index] > 0) { for(double threshold_2 = mean_N[index]; threshold_2 <= mean_P[index] ; threshold_2 += 0.001){ int TP_tmp = 0; int FP_tmp = 0; //ratio = 0; //threshold = -1; for(int j = 0; j < model->l ; ++j){ int flag = 0; if(predict_state[j] == 1){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value > threshold_2) TP_tmp++; } } if(flag == 0){ if( 0 > threshold_2) TP_tmp ++; } } else if(predict_state[j] == 3){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value > threshold_2) FP_tmp ++; } } if(flag == 0){ if( 0 > threshold_2) FP_tmp ++; } } } //printf("threshold2:%f TP_tmp:%d FP_tmp:%d ratio_tmp:%f ",threshold_2,TP_tmp,FP_tmp,TP_tmp/FP_tmp); if(ratio_1 < (TP_tmp * 1.0 / FP_tmp)){ TP = TP_tmp; FP = FP_tmp; ratio_1 = TP * 1.0 / FP; threshold_1 = threshold_2; } //printf("threshold1:%f TP:%d FP:%d ratio:%f \n",threshold_1,TP,FP,ratio_1); }//for }//if else { for(double threshold_2 = mean_N[index]; threshold_2 >= mean_P[index] ; threshold_2 -= 0.001){ int TP_tmp = 0; int FP_tmp = 0; //ratio = 0; //threshold = -1; for(int j = 0; j < model->l ; ++j){ int flag = 0; if(predict_state[j] == 1){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value < threshold_2) TP_tmp++; } } if(flag == 0){ if( 0 < threshold_2) TP_tmp ++; } } else if(predict_state[j] == 3){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value < threshold_2) FP_tmp ++; } } if(flag == 0){ if( 0 < threshold_2) FP_tmp ++; } } } //printf("threshold2:%f TP_tmp:%d FP_tmp:%d ratio_tmp:%f ",threshold_2,TP_tmp,FP_tmp,TP_tmp * 1.0 /FP_tmp); if(ratio_1 < (TP_tmp * 1.0 / FP_tmp)){ TP = TP_tmp; FP = FP_tmp; ratio_1 = TP * 1.0 / FP; threshold_1 = threshold_2; } //printf("threshold1:%f TP:%d FP:%d ratio:%f \n",threshold_1,TP,FP,ratio_1); }//for }//else //printf("TP:%d FP:%d ",TP,FP); threshold[i] = threshold_1; TPR[i] = TP * 1.0 / num_TP; FPR[i] = FP * 1.0 / num_TN; ratio[i] = ratio_1 * num_TN / num_TP; //printf("threshold:%f TPR:%f FPR:%f ratio:%f \n",threshold[i],TPR[i],FPR[i],ratio[i]); }//for quicksort_2(ratio, idx_select, threshold, TPR, FPR, 0, num_10 - 1); for(int i = num_10 - 1; i >= 0; --i){ printf(" | %d | %f | %f | %f | %f | %f |\n",idx_select[i] + 1,zscore[idx_select[i]],threshold[i],TPR[i],FPR[i],ratio[i]); printf(" +---------+------------+-----------+----------+----------+-----------+\n"); } int **state_P = new int*[num_10]; for(int i = 0; i < num_10; ++i){ state_P[i] = new int[num_TP]; memset(state_P[i], 0, sizeof(** state_P) * (num_TP)); } int **state_N = new int*[num_10]; for(int i = 0; i < num_10; ++i){ state_N[i] = new int[num_TN]; memset(state_N[i], 0, sizeof(** state_N) * (num_TN)); } for(int i = 0; i < num_10 ; ++i){ int count_P = 0; int count_N = 0; int index = idx_select[i]; if(zscore[index] > 0) { for(int j = 0; j < model->l ; ++j){ int flag = 0; if(predict_state[j] == 1){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value > threshold[i]) state_P[i][count_P] = 1; } } if(flag == 0){ if( 0 > threshold[i]) state_P[i][count_P] = 1; } count_P ++; } else if(predict_state[j] == 3){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value > threshold[i]) state_N[i][count_N] = 1; } } if(flag == 0){ if( 0 > threshold[i]) state_N[i][count_N] = 1; } count_N ++; } } }//if else { for(int j = 0; j < model->l ; ++j){ int flag = 0; if(predict_state[j] == 1){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value < threshold[i]) state_P[i][count_P] = 1; } } if(flag == 0){ if( 0 < threshold[1]) state_P[i][count_P] = 1; } count_P ++; } else if(predict_state[j] == 3){ for(int k = 0; model->SV[j][k].index != -1; k ++){ if(model->SV[j][k].index == (index + 1)){ flag = 1; if(model->SV[j][k].value < threshold[i]) state_N[i][count_N] = 1; } } if(flag == 0){ if( 0 < threshold[i]) state_N[i][count_N] = 1; } count_N ++; } } }//else }//for printf("\n"); /* for(int i = 0; i < num_10; ++i){ printf("index:%d ",idx_select[i]); double sum_TP = 0; double sum_FP = 0; for(int j = 0; j < num_TP; ++j) sum_TP += state_P[i][j]; printf("TPR_1:%f TPR:%f ",sum_TP * 1.0 / num_TP, TPR[i]); for(int j = 0; j < num_TN; ++j) sum_FP += state_N[i][j]; printf("FPR_1:%f FPR:%f ",sum_FP * 1.0 / num_TN, FPR[i]); } */ int *sum_state_P = new int[num_TP]; memset(sum_state_P, 0, sizeof(*sum_state_P) * (num_TP)); for(int i = 0; i < num_TP; ++i){ for(int j = 0; j < num_10; ++j){ //if(ratio[j] >= 10) sum_state_P[i] += state_P[j][i]; } } int sum_TP = 0; for(int i = 0; i < num_TP; ++i){ if(sum_state_P[i] >= 1) sum_state_P[i] = 1; sum_TP += sum_state_P[i]; } int *sum_state_N = new int[num_TN]; memset(sum_state_N, 0, sizeof(*sum_state_N) * (num_TN)); for(int i = 0; i < num_TN; ++i){ for(int j = 0; j < num_10; ++j){ //if(ratio[j] >= 10) sum_state_N[i] += state_N[j][i]; } } int sum_FP = 0; for(int i = 0; i < num_TN; ++i){ if(sum_state_N[i] >= 1) sum_state_N[i] = 1; sum_FP += sum_state_N[i]; } printf("total TP:%d total FP:%d \n", sum_TP, sum_FP); int *rule_state_P = new int[num_TP]; memset(rule_state_P, 0, sizeof(*rule_state_P) * (num_TP)); int *rule_state_N = new int[num_TN]; memset(rule_state_N, 0, sizeof(*rule_state_N) * (num_TN)); int *rule_state = new int[num_10]; memset(rule_state, 0, sizeof(*rule_state) * (num_10)); rule_state[21] = 1; rule_state[20] = 1; rule_state[19] = 1; rule_state[18] = 1; rule_state[16] = 1; rule_state[15] = 1; rule_state[14] = 1; rule_state[10] = 1; int sum_TP_pre = 0; int sum_TP_new = 0; int sum_FP_pre = 0; int sum_FP_new = 0; for(int i = num_10 - 1; i >= 0; --i){ if(rule_state[i] == 1){ sum_TP_new = 0; for(int j = 0; j < num_TP; ++j){ rule_state_P[j] += state_P[i][j]; } for(int j = 0; j < num_TP; ++j){ if(rule_state_P[j] >= 1) rule_state_P[j] = 1; sum_TP_new += rule_state_P[j]; } sum_FP_new = 0; for(int j = 0; j < num_TN; ++j){ rule_state_N[j] += state_N[i][j]; } for(int j = 0; j < num_TN; ++j){ if(rule_state_N[j] >= 1) rule_state_N[j] = 1; sum_FP_new += rule_state_N[j]; } //printf("index:%d new TP:%d new FP:%d \n",idx_select[i],sum_TP_new - sum_TP_pre, sum_FP_new - sum_FP_pre); sum_TP_pre = sum_TP_new; sum_FP_pre = sum_FP_new; } } int rule_TP = sum_TP_new; int rule_FP = sum_FP_new; double rule_TPR = rule_TP * 1.0 / num_TP; double rule_FPR = rule_FP * 1.0 / num_TN; double rule_ratio = rule_TPR / rule_FPR ; struct rule{ int index; double threshold; int state; }; int num_rule = 0; for(int i = 0; i < num_10; i ++) num_rule += rule_state[i]; struct rule *rule_set = new struct rule[num_rule]; int count_rule = 0; for(int i = 0; i < num_10; i ++){ if(rule_state[i] == 1){ rule_set[count_rule].index = idx_select[i] + 1; rule_set[count_rule].threshold = threshold[i]; rule_set[count_rule].state = zscore[idx_select[i]] > 0?1:-1; count_rule++; } } printf("Rule Set:\n"); for(int i = 0; i < num_rule; i ++){ printf("%d %f %d \n",rule_set[i].index,rule_set[i].threshold,rule_set[i].state); } printf("TP:%d FP:%d TPR:%f FPR:%f ratio:%f \n",rule_TP,rule_FP,rule_TPR,rule_FPR,rule_ratio); }//end of main