MODEL *read_model(char *modelfile) { FILE *modelfl; long i,queryid,slackid; double costfactor; long max_sv,max_words,ll,wpos; char *line,*comment; WORD *words; char version_buffer[100]; MODEL *model; if(verbosity>=1) { printf("Reading model..."); fflush(stdout); } nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */ max_words+=2; ll+=2; words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); line = (char *)my_malloc(sizeof(char)*ll); model = (MODEL *)my_malloc(sizeof(MODEL)); if ((modelfl = fopen (modelfile, "r")) == NULL) { perror (modelfile); exit (1); } fscanf(modelfl,"SVM-light Version %s\n",version_buffer); if(strcmp(version_buffer,VERSION)) { perror ("Version of model-file does not match version of svm_classify!"); exit (1); } fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom); fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); fscanf(modelfl,"%lf%*[^\n]\n", &model->b); model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num); model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num); model->index=NULL; model->lin_weights=NULL; for(i=1;i<model->sv_num;i++) { fgets(line,(int)ll,modelfl); if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid, &costfactor,&wpos,max_words,&comment)) { printf("\nParsing error while reading model file in SV %ld!\n%s", i,line); exit(1); } model->supvec[i] = create_example(-1, 0,0, 0.0, create_svector(words,comment,1.0)); } fclose(modelfl); my_free(line); my_free(words); if(verbosity>=1) { fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1)); } return(model); }
void read_documents(char *docfile, DOC ***docs, double **label, long int *totwords, long int *totdoc) { char *line,*comment; WORD *words; long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs; long max_words_doc, ll; double doc_label,costfactor; FILE *docfl; if(verbosity>=1) { printf("Scanning examples..."); fflush(stdout); } nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ max_words_doc+=2; ll+=2; max_docs+=2; if(verbosity>=1) { printf("done\n"); fflush(stdout); } (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs); /* feature vectors */ (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ line = (char *)my_malloc(sizeof(char)*ll); if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); exit (1); } words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); if(verbosity>=1) { printf("Reading examples into memory..."); fflush(stdout); } dnum=0; (*totwords)=0; while((!feof(docfl)) && fgets(line,(int)ll,docfl)) { if(line[0] == '#') continue; /* line contains comments */ if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor, &wpos,max_words_doc,&comment)) { printf("\nParsing error in line %ld!\n%s",dnum,line); exit(1); } (*label)[dnum]=doc_label; /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */ if(doc_label > 0) dpos++; if (doc_label < 0) dneg++; if (doc_label == 0) dunlab++; if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) (*totwords)=(words[wpos-2]).wnum; if((*totwords) > MAXFEATNUM) { printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n"); printf("LINE: %s\n",line); exit(1); } (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor, create_svector(words,comment,1.0)); /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); */ dnum++; if(verbosity>=1) { if((dnum % 100) == 0) { printf("%ld..",dnum); fflush(stdout); } } } fclose(docfl); my_free(line); my_free(words); if(verbosity>=1) { fprintf(stdout, "OK. (%ld examples read)\n", dnum); } (*totdoc)=dnum; }
int main (int argc, char* argv[]) { DOC *docs; /* training examples */ long max_docs,max_words_doc; long totwords,totdoc,ll,i; long kernel_cache_size; double *target; KERNEL_CACHE kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; read_input_parameters(argc,argv,docfile,modelfile,&verbosity, &kernel_cache_size,&learn_parm,&kernel_parm); if(verbosity>=1) { printf("Scanning examples..."); fflush(stdout); } nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ max_words_doc+=10; ll+=10; max_docs+=2; if(verbosity>=1) { printf("done\n"); fflush(stdout); } docs = (DOC *)my_malloc(sizeof(DOC)*max_docs); /* feature vectors */ target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll); read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm); printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords); //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ??????? if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } } else { if(learn_parm.type == CLASSIFICATION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size); svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == REGRESSION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size); svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == RANKING) { printf("Learning rankings is not implemented for non-linear kernels in this version!\n"); exit(1); } else if(learn_parm.type == PERCEPTRON) { perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model,modelfile); } else if(learn_parm.type == PERCEPTRON_BATCH) { batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache_size,&model); } } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ write_model(modelfile,&model); free(model.supvec); free(model.alpha); free(model.index); for(i=0;i<totdoc;i++){ freeExample(&docs[i]); } free(docs); free(target); return(0); }
int main_classify (int argc, char* argv[]) { DOC *doc; /* test example */ WORDSVM *words; long max_docs,max_words_doc,lld; long totdoc=0,queryid,slackid; long correct=0,incorrect=0,no_accuracy=0; long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; long j; double t1,runtime=0; double dist,doc_label,costfactor; char *line,*comment; FILE *predfl,*docfl; MODEL *model; read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, &verbosity,&pred_format); nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ max_words_doc+=2; lld+=2; line = (char *)my_malloc(sizeof(char)*lld); words = (WORDSVM *)my_malloc(sizeof(WORDSVM)*(max_words_doc+10)); model=read_model(modelfile); if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } if(verbosity>=2) { printf("Classifying test examples.."); fflush(stdout); } if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); exit (1); } if ((predfl = fopen (predictionsfile, "w")) == NULL) { perror (predictionsfile); exit (1); } while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { if(line[0] == '#') continue; /* line contains comments */ parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, max_words_doc,&comment); totdoc++; if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ if((words[j]).wnum>model->totwords) /* are not larger than in */ (words[j]).wnum=0; /* model. Remove feature if */ } /* necessary. */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example_linear(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } else { /* non-linear kernel */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } if(dist>0) { if(pred_format==0) { /* old weired output format */ fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); } if(doc_label>0) correct++; else incorrect++; if(doc_label>0) res_a++; else res_b++; } else { if(pred_format==0) { /* old weired output format */ fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); } if(doc_label<0) correct++; else incorrect++; if(doc_label>0) res_c++; else res_d++; } if(pred_format==1) { /* output the value of decision function */ fprintf(predfl,"%.8g\n",dist); } if((int)(0.01+(doc_label*doc_label)) != 1) { no_accuracy=1; } /* test data is not binary labeled */ if(verbosity>=2) { if(totdoc % 100 == 0) { printf("%ld..",totdoc); fflush(stdout); } } } free(line); free(words); free_model(model,1); if(verbosity>=2) { printf("done\n"); /* Note by Gary Boone Date: 29 April 2000 */ /* o Timing is inaccurate. The timer has 0.01 second resolution. */ /* Because classification of a single vector takes less than */ /* 0.01 secs, the timer was underflowing. */ printf("Runtime (without IO) in cpu-seconds: %.2f\n", (float)(runtime/100.0)); } if((!no_accuracy) && (verbosity>=1)) { printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); } return(0); }
void SVMLightRunner::libraryReadDocuments ( char *docfile, DOC ***docs, double **label, long int *totwords, long int *totdoc, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".libraryReadDocuments() Started." ); char *line,*comment; WORD *words; long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs; long max_words_doc, ll; double doc_label,costfactor; FILE *docfl; if(verbosity>=1) { C_PRINTF("Scanning examples..."); C_FFLUSH(stdout); } // GMUM.R changes { if (!use_gmumr) { nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ } else { max_docs = config.target.n_rows; max_words_doc = config.getDataDim(); // ll used only for file reading } // GMUM.R changes } max_words_doc+=2; ll+=2; max_docs+=2; if(verbosity>=1) { C_PRINTF("done\n"); C_FFLUSH(stdout); } (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs); /* feature vectors */ (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ // GMUM.R changes { if (!use_gmumr) { line = (char *)my_malloc(sizeof(char)*ll); if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); EXIT (1); } } // GMUM.R changes } words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); if(verbosity>=1) { C_PRINTF("Reading examples into memory..."); C_FFLUSH(stdout); } dnum=0; (*totwords)=0; // GMUM.R changes { bool newline; if (!use_gmumr) { newline = (!feof(docfl)) && fgets(line,(int)ll,docfl); } else { newline = false; if (dnum < config.target.n_rows) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } while(newline) { if (use_gmumr) { std::string stringline = ""; } // GMUM.R changes } if(line[0] == '#') continue; /* line contains comments */ if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor, &wpos,max_words_doc,&comment)) { C_PRINTF("\nParsing error in line %ld!\n%s",dnum,line); EXIT(1); } (*label)[dnum]=doc_label; /* C_PRINTF("docnum=%ld: Class=%f ",dnum,doc_label); */ if(doc_label > 0) dpos++; if (doc_label < 0) dneg++; if (doc_label == 0) { if(config.use_transductive_learning){ dunlab++; }else{ C_PRINTF("Please for transductive learning pass use_transductive_learning\n"); EXIT(1); } } if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) (*totwords)=(words[wpos-2]).wnum; if((*totwords) > MAXFEATNUM) { C_PRINTF("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n"); EXIT(1); } (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor, create_svector(words,comment,1.0)); /* C_PRINTF("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); */ dnum++; if(verbosity>=1) { if((dnum % 100) == 0) { C_PRINTF("%ld..",dnum); C_FFLUSH(stdout); } } // GMUM.R changes { if (!use_gmumr) { newline = (!feof(docfl)) && fgets(line,(int)ll,docfl); } else { newline = false; if (dnum < config.target.n_rows) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } // GMUM.R changes } } if (!use_gmumr) { fclose(docfl); free(line); }; free(words); if(verbosity>=1) { C_FPRINTF(stdout, "OK. (%ld examples read)\n", dnum); } (*totdoc)=dnum; }
MODEL * SVMLightRunner::libraryReadModel( char *modelfile, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".libraryReadModel() Started." ); FILE *modelfl; long i,queryid,slackid; double costfactor; long max_sv,max_words,ll,wpos; char *line,*comment; WORD *words; char version_buffer[100]; MODEL *model; if(verbosity>=1) { C_PRINTF("Reading model..."); C_FFLUSH(stdout); } // GMUM.R changes { model = (MODEL *)my_malloc(sizeof(MODEL)); if (!use_gmumr) { nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */ max_words+=2; ll+=2; words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); line = (char *)my_malloc(sizeof(char)*ll); if ((modelfl = fopen (modelfile, "r")) == NULL) { perror (modelfile); EXIT (1); } fscanf(modelfl,"SVM-light Version %s\n",version_buffer); if(strcmp(version_buffer,VERSION)) { perror ("Version of model-file does not match version of svm_classify!"); EXIT (1); } fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom); fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); fscanf(modelfl,"%lf%*[^\n]\n", &model->b); } else { // use_gmumr max_words = config.getDataDim(); words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".libraryReadModel() Converting config to model..." ); /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom -- same as GMUM.R! */ model->kernel_parm.kernel_type = static_cast<long int>(config.kernel_type); // -d int -> parameter d in polynomial kernel model->kernel_parm.poly_degree = config.degree; // -g float -> parameter gamma in rbf kernel model->kernel_parm.rbf_gamma = config.gamma; // -s float -> parameter s in sigmoid/poly kernel model->kernel_parm.coef_lin = config.gamma; // -r float -> parameter c in sigmoid/poly kernel model->kernel_parm.coef_const = config.coef0; // -u string -> parameter of user defined kernel char kernel_parm_custom[50] = "empty"; char * model_kernel_parm_custom = model->kernel_parm.custom; model_kernel_parm_custom = kernel_parm_custom; // highest feature index model->totwords = config.getDataDim(); // number of training documents model->totdoc = config.target.n_rows; // number of support vectors plus 1 (!) model->sv_num = config.l + 1; /* Threshold b (has opposite sign than SVMClient::predict()) * In svm_common.c:57 in double classify_example_linear(): * return(sum-model->b); */ model->b = - config.b; LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".libraryReadModel() Converting config done." ); } // GMUM.R changes } model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num); model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num); model->index=NULL; model->lin_weights=NULL; // GMUM.R changes { if (!use_gmumr) { for(i=1;i<model->sv_num;i++) { fgets(line,(int)ll,modelfl); if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid, &costfactor,&wpos,max_words,&comment)) { C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s", i,line); EXIT(1); } model->supvec[i] = create_example(-1, 0,0, 0.0, create_svector(words,comment,1.0)); } fclose(modelfl); free(line); } else { for(i = 1; i < model->sv_num; ++i) { line = SVMConfigurationToSVMLightModelSVLine(config, i-1); if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid, &costfactor,&wpos,max_words,&comment)) { C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s", i,line); EXIT(1); } model->supvec[i] = create_example(-1, 0,0, 0.0, create_svector(words,comment,1.0)); free(line); } } // GMUM.R changes } free(words); if(verbosity>=1) { C_FPRINTF(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1)); } LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".libraryReadModel() Done." ); return(model); }
int SVMLightRunner::librarySVMClassifyMain( int argc, char **argv, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMClassifyMain() Started." ); DOC *doc; /* test example */ WORD *words; long max_docs,max_words_doc,lld; long totdoc=0,queryid,slackid; long correct=0,incorrect=0,no_accuracy=0; long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; long j; double t1,runtime=0; double dist,doc_label,costfactor; char *line,*comment; FILE *predfl,*docfl; MODEL *model; // GMUM.R changes { librarySVMClassifyReadInputParameters( argc, argv, docfile, modelfile, predictionsfile, &verbosity, &pred_format, use_gmumr, config); if (!use_gmumr) { nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ lld+=2; line = (char *)my_malloc(sizeof(char)*lld); } else { max_docs = config.target.n_rows; max_words_doc = config.getDataDim(); config.result = arma::zeros<arma::vec>(max_docs); // Prevent writing to the file pred_format = -1; // lld used only for file reading } max_words_doc+=2; words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); // GMUM.R changes } model=libraryReadModel(modelfile, use_gmumr, config); // GMUM.R changes } if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } if(verbosity>=2) { C_PRINTF("Classifying test examples.."); C_FFLUSH(stdout); } // GMUM.R changes { bool newline; if (!use_gmumr) { if ((predfl = fopen (predictionsfile, "w")) == NULL) { perror (predictionsfile); EXIT (1); } if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); EXIT (1); } newline = (!feof(docfl)) && fgets(line,(int)lld,docfl); } else { newline = false; if (totdoc < config.getDataExamplesNumber()) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } while(newline) { if (use_gmumr) { std::string stringline = ""; } // GMUM.R changes } if(line[0] == '#') continue; /* line contains comments */ parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, max_words_doc,&comment); totdoc++; if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ if((words[j]).wnum>model->totwords) /* are not larger than in */ (words[j]).wnum=0; /* model. Remove feature if */ } /* necessary. */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example_linear(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } else { /* non-linear kernel */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } if(dist>0) { if(pred_format==0) { /* old weired output format */ C_FPRINTF(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); } if(doc_label>0) correct++; else incorrect++; if(doc_label>0) res_a++; else res_b++; } else { if(pred_format==0) { /* old weired output format */ C_FPRINTF(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); } if(doc_label<0) correct++; else incorrect++; if(doc_label>0) res_c++; else res_d++; } if(pred_format==1) { /* output the value of decision function */ C_FPRINTF(predfl,"%.8g\n",dist); } if((int)(0.01+(doc_label*doc_label)) != 1) { no_accuracy=1; } /* test data is not binary labeled */ if(verbosity>=2) { if(totdoc % 100 == 0) { C_PRINTF("%ld..",totdoc); C_FFLUSH(stdout); } } // GMUM.R changes { if (!use_gmumr) { newline = (!feof(docfl)) && fgets(line,(int)lld,docfl); } else { newline = false; // Store prediction result in config config.result[totdoc-1] = dist; // Read next line if (totdoc < config.getDataExamplesNumber()) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } } if (!use_gmumr) { fclose(predfl); fclose(docfl); free(line); } // GMUM.R changes } free(words); free_model(model,1); if(verbosity>=2) { C_PRINTF("done\n"); /* Note by Gary Boone Date: 29 April 2000 */ /* o Timing is inaccurate. The timer has 0.01 second resolution. */ /* Because classification of a single vector takes less than */ /* 0.01 secs, the timer was underflowing. */ C_PRINTF("Runtime (without IO) in cpu-seconds: %.2f\n", (float)(runtime/100.0)); } if((!no_accuracy) && (verbosity>=1)) { C_PRINTF("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); C_PRINTF("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); } return(0); }
struct Set *load_svmlight_name(struct PARAMS *P, char *fname){ FILE *fp; char *line; long int i,num_lines,num_attribs,ll,total; struct Set *S; ldiv_t tmp; int v; char *c_id; if(P->partition==1){ P->part=(long int**)malloc(sizeof(long int)*P->numViews); for(v=0;v<P->numViews;v++)P->part[v]=(long int*)malloc(sizeof(long int)*MAX_ATTRIBS_PER_VIEW); for(v=0;v<P->numViews;v++)for(i=0;i<MAX_ATTRIBS_PER_VIEW;i++)P->part[v][i]=0; //read partition file nol_ll("partition.coem",&num_lines,&num_attribs,&ll); line=(char*)malloc(sizeof(char)*(2+ll)); printf("reading partition.coem...views ");fflush(stdout); num_lines--; if(P->numViews!=num_lines)halt("are not corresponding!\n"); fp=fopen("partition.coem","r"); for(v=0;v<num_lines;v++){ printf("[%d]",v); fgets(line,(int)ll+2,fp); c_id=strtok(line," ,"); i=0; do{ if(atol(c_id)!=0){ P->part[v][i]=atol(c_id); //printf("(%ld)",P->part[v][i]); i++; } }while ((c_id=strtok(NULL,", "))!=0); } fclose(fp); free(line); printf("...done\n"); } nol_ll(fname,&num_lines,&num_attribs,&ll); line=(char*)malloc(sizeof(char)*(ll+2)); S=(struct Set*)malloc(sizeof(struct Set)); S->maxId = 0; S->example=(struct Example**)malloc(sizeof(struct Example*)*num_lines); if (!(fp=fopen(fname,"r"))) halt("cannot open fv file"); total=0; for(i=0;i<=num_lines;i++){ if(fgets(line,(int)ll+2,fp)) { if (*line!='\n'){ S->example[total]=parse_svmlight(line, P); if(S->example[total]->View[0]->dim > S->maxId) S->maxId = S->example[total]->View[0]->dim; S->example[total]->nr=total; total++; } tmp=ldiv(total,100); /* if (tmp.rem==0){ printf("..."); printf("%ld",total); fflush(stdout); } */ } } //if(tmp.rem!=0)printf("...%ld",total); //printf(" examples read.\n"); fflush(stdout); S->N=total; free(line); fclose(fp); S->num_labeled = total; S->num_unlabeled=0; S->num_positive=0; S->num_negative=0; if(P->partition==1){ //free mem for(v=0;v<P->numViews;v++)free(P->part[v]); free(P->part); } return S; }
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm) { /* Reads structural model sm from file file. This function is used only in the prediction module, not in the learning module. */ FILE *modelfl; STRUCTMODEL sm; long i,queryid,slackid; double costfactor; long max_sv,max_words,ll,wpos; char *line,*comment; TOKEN *words; char version_buffer[100]; MODEL *model; nol_ll(file,&max_sv,&max_words,&ll); /* scan size of model file */ max_words+=2; ll+=2; words = (TOKEN *)my_malloc(sizeof(TOKEN)*(max_words+10)); line = (char *)my_malloc(sizeof(char)*ll); model = (MODEL *)my_malloc(sizeof(MODEL)); if ((modelfl = fopen (file, "r")) == NULL) { perror (file); exit (1); } fscanf(modelfl,"SVM-multiclass Version %s\n",version_buffer); if(strcmp(version_buffer,INST_VERSION)) { perror ("Version of model-file does not match version of svm_struct_classify!"); exit (1); } fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_classes); fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_features); fscanf(modelfl,"%d%*[^\n]\n", &sparm->loss_function); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom); fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); fscanf(modelfl,"%lf%*[^\n]\n", &model->b); model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num); model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num); model->index=NULL; model->lin_weights=NULL; for(i=1;i<model->sv_num;i++) { fgets(line,(int)ll,modelfl); if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid, &costfactor,&wpos,max_words,&comment, true)) { printf("\nParsing error while reading model file in SV %ld!\n%s", i,line); exit(1); } model->supvec[i] = create_example(-1,0,0,0.0, create_svector(words,comment,1.0)); model->supvec[i]->fvec->kernel_id=queryid; } fclose(modelfl); free(line); free(words); if(verbosity>=1) { fprintf(stdout, " (%d support vectors read) ",(int)(model->sv_num-1)); } sm.svm_model=model; sm.sizePsi=model->totwords; sm.w=NULL; return(sm); }