Exemple #1
0
MODEL *read_model(char *modelfile)
{
  FILE *modelfl;
  long i,queryid,slackid;
  double costfactor;
  long max_sv,max_words,ll,wpos;
  char *line,*comment;
  WORD *words;
  char version_buffer[100];
  MODEL *model;

  if(verbosity>=1) {
    printf("Reading model..."); fflush(stdout);
  }

  nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
  max_words+=2;
  ll+=2;

  words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
  line = (char *)my_malloc(sizeof(char)*ll);
  model = (MODEL *)my_malloc(sizeof(MODEL));

  if ((modelfl = fopen (modelfile, "r")) == NULL)
  { perror (modelfile); exit (1); }

  fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
  if(strcmp(version_buffer,VERSION)) {
    perror ("Version of model-file does not match version of svm_classify!"); 
    exit (1); 
  }
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);

  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
  model->index=NULL;
  model->lin_weights=NULL;

  for(i=1;i<model->sv_num;i++) {
    fgets(line,(int)ll,modelfl);
    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
		       &costfactor,&wpos,max_words,&comment)) {
      printf("\nParsing error while reading model file in SV %ld!\n%s",
	     i,line);
      exit(1);
    }
    model->supvec[i] = create_example(-1,
				      0,0,
				      0.0,
				      create_svector(words,comment,1.0));
  }
  fclose(modelfl);
  my_free(line);
  my_free(words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
  }
  return(model);
}
Exemple #2
0
void read_documents(char *docfile, DOC ***docs, double **label, 
		    long int *totwords, long int *totdoc)
{
  char *line,*comment;
  WORD *words;
  long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
  long max_words_doc, ll;
  double doc_label,costfactor;
  FILE *docfl;

  if(verbosity>=1) {
    printf("Scanning examples..."); fflush(stdout);
  }
  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
  max_words_doc+=2;
  ll+=2;
  max_docs+=2;
  if(verbosity>=1) {
    printf("done\n"); fflush(stdout);
  }

  (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
  (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
  line = (char *)my_malloc(sizeof(char)*ll);

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }

  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
  if(verbosity>=1) {
    printf("Reading examples into memory..."); fflush(stdout);
  }
  dnum=0;
  (*totwords)=0;
  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
		       &wpos,max_words_doc,&comment)) {
      printf("\nParsing error in line %ld!\n%s",dnum,line);
      exit(1);
    }
    (*label)[dnum]=doc_label;
    /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */
    if(doc_label > 0) dpos++;
    if (doc_label < 0) dneg++;
    if (doc_label == 0) dunlab++;
    if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) 
      (*totwords)=(words[wpos-2]).wnum;
    if((*totwords) > MAXFEATNUM) {
      printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
      printf("LINE: %s\n",line);
      exit(1);
    }
    (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
				   create_svector(words,comment,1.0));
    /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */
    dnum++;  
    if(verbosity>=1) {
      if((dnum % 100) == 0) {
	printf("%ld..",dnum); fflush(stdout);
      }
    }
  } 

  fclose(docfl);
  my_free(line);
  my_free(words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%ld examples read)\n", dnum);
  }
  (*totdoc)=dnum;
}
Exemple #3
0
int main (int argc, char* argv[])
{  
  DOC *docs;  /* training examples */
  long max_docs,max_words_doc;
  long totwords,totdoc,ll,i;
  long kernel_cache_size;
  double *target;
  KERNEL_CACHE kernel_cache;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  MODEL model;

  read_input_parameters(argc,argv,docfile,modelfile,&verbosity,
			&kernel_cache_size,&learn_parm,&kernel_parm);

  if(verbosity>=1) {
    printf("Scanning examples..."); fflush(stdout);
  }
  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
  max_words_doc+=10;
  ll+=10;
  max_docs+=2;
  if(verbosity>=1) {
    printf("done\n"); fflush(stdout);
  }

  docs = (DOC *)my_malloc(sizeof(DOC)*max_docs);         /* feature vectors */
  target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
 //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll);
  read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm);
  printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords);
 
 //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc 
  				                 // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ???????

  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
    if(learn_parm.type == CLASSIFICATION) {
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			       &kernel_parm,NULL,&model);
    }
    else if(learn_parm.type == REGRESSION) {
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,NULL,&model);
    }
    else if(learn_parm.type == RANKING) {
      svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,NULL,&model);
    }
  }
  else {
    if(learn_parm.type == CLASSIFICATION) {
      /* Always get a new kernel cache. It is not possible to use the
         same cache for two different training runs */
      kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size);
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			       &kernel_parm,&kernel_cache,&model);
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(&kernel_cache);
    }
    else if(learn_parm.type == REGRESSION) {
      /* Always get a new kernel cache. It is not possible to use the
         same cache for two different training runs */
      kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size);
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,&kernel_cache,&model);
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(&kernel_cache);
    }
    else if(learn_parm.type == RANKING) {
      printf("Learning rankings is not implemented for non-linear kernels in this version!\n");
      exit(1);
    }
    else if(learn_parm.type == PERCEPTRON) {
			perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,&kernel_cache,&model,modelfile);
    }
	    else if(learn_parm.type == PERCEPTRON_BATCH) {
			batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,kernel_cache_size,&model);
    }

  }

  /* Warning: The model contains references to the original data 'docs'.
     If you want to free the original data, and only keep the model, you 
     have to make a deep copy of 'model'. */
  write_model(modelfile,&model);

  free(model.supvec);
  free(model.alpha);
  free(model.index);
  
   for(i=0;i<totdoc;i++){
     freeExample(&docs[i]);
   }
  
  free(docs);
  free(target);

  return(0);
}
Exemple #4
0
int main_classify (int argc, char* argv[])
{
  DOC *doc;   /* test example */
  WORDSVM *words;
  long max_docs,max_words_doc,lld;
  long totdoc=0,queryid,slackid;
  long correct=0,incorrect=0,no_accuracy=0;
  long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
  long j;
  double t1,runtime=0;
  double dist,doc_label,costfactor;
  char *line,*comment; 
  FILE *predfl,*docfl;
  MODEL *model; 

  read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
			&verbosity,&pred_format);

  nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
  max_words_doc+=2;
  lld+=2;

  line = (char *)my_malloc(sizeof(char)*lld);
  words = (WORDSVM *)my_malloc(sizeof(WORDSVM)*(max_words_doc+10));

  model=read_model(modelfile);

  if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
    /* compute weight vector */
    add_weight_vector_to_linear_model(model);
  }
  
  if(verbosity>=2) {
    printf("Classifying test examples.."); fflush(stdout);
  }

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }
  if ((predfl = fopen (predictionsfile, "w")) == NULL)
  { perror (predictionsfile); exit (1); }

  while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
		   max_words_doc,&comment);
    totdoc++;
    if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
      for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
	if((words[j]).wnum>model->totwords) /* are not larger than in     */
	  (words[j]).wnum=0;               /* model. Remove feature if   */
      }                                        /* necessary.                 */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example_linear(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    else {                             /* non-linear kernel */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    if(dist>0) {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
      }
      if(doc_label>0) correct++; else incorrect++;
      if(doc_label>0) res_a++; else res_b++;
    }
    else {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
      }
      if(doc_label<0) correct++; else incorrect++;
      if(doc_label>0) res_c++; else res_d++;
    }
    if(pred_format==1) { /* output the value of decision function */
      fprintf(predfl,"%.8g\n",dist);
    }
    if((int)(0.01+(doc_label*doc_label)) != 1) 
      { no_accuracy=1; } /* test data is not binary labeled */
    if(verbosity>=2) {
      if(totdoc % 100 == 0) {
	printf("%ld..",totdoc); fflush(stdout);
      }
    }
  }  
  free(line);
  free(words);
  free_model(model,1);

  if(verbosity>=2) {
    printf("done\n");

/*   Note by Gary Boone                     Date: 29 April 2000        */
/*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
/*        Because classification of a single vector takes less than    */
/*        0.01 secs, the timer was underflowing.                       */
    printf("Runtime (without IO) in cpu-seconds: %.2f\n",
	   (float)(runtime/100.0));
    
  }
  if((!no_accuracy) && (verbosity>=1)) {
    printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
    printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
  }

  return(0);
}
Exemple #5
0
void SVMLightRunner::libraryReadDocuments (
    char *docfile, DOC ***docs, double **label, long int *totwords,
    long int *totdoc, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadDocuments() Started."
    );

    char *line,*comment;
    WORD *words;
    long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
    long max_words_doc, ll;
    double doc_label,costfactor;
    FILE *docfl;

    if(verbosity>=1) {
      C_PRINTF("Scanning examples..."); C_FFLUSH(stdout);
    }
    // GMUM.R changes {
    if (!use_gmumr) {
        nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
    } else {
        max_docs = config.target.n_rows;
        max_words_doc = config.getDataDim();
        // ll used only for file reading
    }
    // GMUM.R changes }
    max_words_doc+=2;
    ll+=2;
    max_docs+=2;
    if(verbosity>=1) {
      C_PRINTF("done\n"); C_FFLUSH(stdout);
    }

    (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
    (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
    // GMUM.R changes {
    if (!use_gmumr) {
        line = (char *)my_malloc(sizeof(char)*ll);

        if ((docfl = fopen (docfile, "r")) == NULL)
        { perror (docfile); EXIT (1); }
    }
    // GMUM.R changes }

    words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
    if(verbosity>=1) {
      C_PRINTF("Reading examples into memory..."); C_FFLUSH(stdout);
    }
    dnum=0;
    (*totwords)=0;
    // GMUM.R changes {
    bool newline;
    if (!use_gmumr) {
        newline = (!feof(docfl)) && fgets(line,(int)ll,docfl);
    } else {
        newline = false;
        if (dnum < config.target.n_rows) {
            newline = true;
            std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum);
            line = new char[str.size() + 1];
            std::copy(str.begin(), str.end(), line);
            line[str.size()] = '\0';
        }
    }
    while(newline) {
      if (use_gmumr) {
            std::string stringline = "";
      }
      // GMUM.R changes }
      if(line[0] == '#') continue;  /* line contains comments */
      if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
                 &wpos,max_words_doc,&comment)) {
        C_PRINTF("\nParsing error in line %ld!\n%s",dnum,line);
        EXIT(1);
      }
      (*label)[dnum]=doc_label;
      /* C_PRINTF("docnum=%ld: Class=%f ",dnum,doc_label); */
      if(doc_label > 0) dpos++;
      if (doc_label < 0) dneg++;
      if (doc_label == 0) {
    	  if(config.use_transductive_learning){
    		  dunlab++;
    	  }else{
    		  C_PRINTF("Please for transductive learning pass use_transductive_learning\n");
    		  EXIT(1);
    	  }
      }
      if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) 
        (*totwords)=(words[wpos-2]).wnum;
      if((*totwords) > MAXFEATNUM) {
        C_PRINTF("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
        EXIT(1);
      }
      (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
                     create_svector(words,comment,1.0));
      /* C_PRINTF("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */
      dnum++;  
      if(verbosity>=1) {
        if((dnum % 100) == 0) {
        	C_PRINTF("%ld..",dnum); C_FFLUSH(stdout);
        }
      }
      // GMUM.R changes {
      if (!use_gmumr) {
          newline = (!feof(docfl)) && fgets(line,(int)ll,docfl);
      } else {
          newline = false;
          if (dnum < config.target.n_rows) {
              newline = true;
              std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum);
              line = new char[str.size() + 1];
              std::copy(str.begin(), str.end(), line);
              line[str.size()] = '\0';
          }
      }
      // GMUM.R changes }
    } 

    if (!use_gmumr) {
        fclose(docfl);
        free(line);
    };
    free(words);
    if(verbosity>=1) {
      C_FPRINTF(stdout, "OK. (%ld examples read)\n", dnum);
    }
    (*totdoc)=dnum;
}
Exemple #6
0
MODEL * SVMLightRunner::libraryReadModel(
    char *modelfile, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadModel() Started."
    );
    FILE *modelfl;
    long i,queryid,slackid;
    double costfactor;
    long max_sv,max_words,ll,wpos;
    char *line,*comment;
    WORD *words;
    char version_buffer[100];
    MODEL *model;

    if(verbosity>=1) {
      C_PRINTF("Reading model..."); C_FFLUSH(stdout);
    }

    // GMUM.R changes {
    model = (MODEL *)my_malloc(sizeof(MODEL));

    if (!use_gmumr) {
        nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
        max_words+=2;
        ll+=2;

        words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
        line = (char *)my_malloc(sizeof(char)*ll);

        if ((modelfl = fopen (modelfile, "r")) == NULL)
        { perror (modelfile); EXIT (1); }

        fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
        if(strcmp(version_buffer,VERSION)) {
          perror ("Version of model-file does not match version of svm_classify!"); 
          EXIT (1); 
        }
        fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
        fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
        fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

        fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
        fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
        fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->b);
    } else { // use_gmumr
        max_words = config.getDataDim();
        words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));

        LOG(
            config.log,
            LogLevel::DEBUG_LEVEL,
            __debug_prefix__ + ".libraryReadModel() Converting config to model..."
        );

        /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom -- same as GMUM.R! */
        model->kernel_parm.kernel_type = static_cast<long int>(config.kernel_type);
        // -d int      -> parameter d in polynomial kernel
        model->kernel_parm.poly_degree = config.degree;
        // -g float    -> parameter gamma in rbf kernel
        model->kernel_parm.rbf_gamma = config.gamma;
        // -s float    -> parameter s in sigmoid/poly kernel
        model->kernel_parm.coef_lin = config.gamma;
        // -r float    -> parameter c in sigmoid/poly kernel
        model->kernel_parm.coef_const = config.coef0;
        // -u string   -> parameter of user defined kernel
        char kernel_parm_custom[50] = "empty";
        char * model_kernel_parm_custom = model->kernel_parm.custom;
        model_kernel_parm_custom = kernel_parm_custom;
        // highest feature index
        model->totwords = config.getDataDim();
        // number of training documents
        model->totdoc = config.target.n_rows;
        // number of support vectors plus 1 (!)
        model->sv_num = config.l + 1;
        /* Threshold b (has opposite sign than SVMClient::predict())
         * In svm_common.c:57 in double classify_example_linear():
         *     return(sum-model->b);
         */
        model->b = - config.b;

        LOG(
            config.log,
            LogLevel::DEBUG_LEVEL,
            __debug_prefix__ + ".libraryReadModel() Converting config done."
        );
    }
    // GMUM.R changes }

    model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
    model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
    model->index=NULL;
    model->lin_weights=NULL;

    // GMUM.R changes {
    if (!use_gmumr) {
        for(i=1;i<model->sv_num;i++) {
          fgets(line,(int)ll,modelfl);
          if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
                     &costfactor,&wpos,max_words,&comment)) {
            C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s",
               i,line);
            EXIT(1);
          }
          model->supvec[i] = create_example(-1,
                            0,0,
                            0.0,
                            create_svector(words,comment,1.0));
        }
        fclose(modelfl);
        free(line);
    } else {
        for(i = 1; i < model->sv_num; ++i) {
            line = SVMConfigurationToSVMLightModelSVLine(config, i-1);
            if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
                       &costfactor,&wpos,max_words,&comment)) {
              C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s",
                 i,line);
              EXIT(1);
            }
            model->supvec[i] = create_example(-1,
                              0,0,
                              0.0,
                              create_svector(words,comment,1.0));
            free(line);
        }
    }
    // GMUM.R changes }
    free(words);
    if(verbosity>=1) {
      C_FPRINTF(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
    }

    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadModel() Done."
    );

    return(model);
}
Exemple #7
0
int SVMLightRunner::librarySVMClassifyMain(
    int argc, char **argv, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMClassifyMain() Started."
    );
    DOC *doc;   /* test example */
    WORD *words;
    long max_docs,max_words_doc,lld;
    long totdoc=0,queryid,slackid;
    long correct=0,incorrect=0,no_accuracy=0;
    long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
    long j;
    double t1,runtime=0;
    double dist,doc_label,costfactor;
    char *line,*comment; 
    FILE *predfl,*docfl;
    MODEL *model; 

    // GMUM.R changes {
    librarySVMClassifyReadInputParameters(
        argc, argv, docfile, modelfile, predictionsfile, &verbosity,
        &pred_format, use_gmumr, config);

    if (!use_gmumr) {
        nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
        lld+=2;

        line = (char *)my_malloc(sizeof(char)*lld);
    } else {
        max_docs = config.target.n_rows;
        max_words_doc = config.getDataDim();
        config.result = arma::zeros<arma::vec>(max_docs);
        // Prevent writing to the file
        pred_format = -1;
        // lld used only for file reading
    }
    max_words_doc+=2;
    words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
    // GMUM.R changes }

    model=libraryReadModel(modelfile, use_gmumr, config);
    // GMUM.R changes }

    if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
      /* compute weight vector */
      add_weight_vector_to_linear_model(model);
    }
    
    if(verbosity>=2) {
      C_PRINTF("Classifying test examples.."); C_FFLUSH(stdout);
    }

    // GMUM.R changes {
    bool newline;
    if (!use_gmumr) {
        if ((predfl = fopen (predictionsfile, "w")) == NULL)
        { perror (predictionsfile); EXIT (1); }
        if ((docfl = fopen (docfile, "r")) == NULL)
        { perror (docfile); EXIT (1); }

        newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
    } else {
        newline = false;
        if (totdoc < config.getDataExamplesNumber()) {
            newline = true;
            std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
            line = new char[str.size() + 1];
            std::copy(str.begin(), str.end(), line);
            line[str.size()] = '\0';
        }
    }
    while(newline) {
      if (use_gmumr) {
            std::string stringline = "";
      }
      // GMUM.R changes }
      if(line[0] == '#') continue;  /* line contains comments */
      parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
             max_words_doc,&comment);
      totdoc++;
      if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
        for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
      if((words[j]).wnum>model->totwords) /* are not larger than in     */
        (words[j]).wnum=0;               /* model. Remove feature if   */
        }                                        /* necessary.                 */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example_linear(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      else {                             /* non-linear kernel */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      if(dist>0) {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
        }
        if(doc_label>0) correct++; else incorrect++;
        if(doc_label>0) res_a++; else res_b++;
      }
      else {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
        }
        if(doc_label<0) correct++; else incorrect++;
        if(doc_label>0) res_c++; else res_d++;
      }
      if(pred_format==1) { /* output the value of decision function */
        C_FPRINTF(predfl,"%.8g\n",dist);
      }
      if((int)(0.01+(doc_label*doc_label)) != 1)
        { no_accuracy=1; } /* test data is not binary labeled */
      if(verbosity>=2) {
        if(totdoc % 100 == 0) {
      C_PRINTF("%ld..",totdoc); C_FFLUSH(stdout);
        }
      }
      // GMUM.R changes {
      if (!use_gmumr) {
          newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
      } else {
          newline = false;
          // Store prediction result in config
          config.result[totdoc-1] = dist;
          // Read next line
          if (totdoc < config.getDataExamplesNumber()) {
              newline = true;
              std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
              line = new char[str.size() + 1];
              std::copy(str.begin(), str.end(), line);
              line[str.size()] = '\0';
          }
      }
    }
    if (!use_gmumr) {
        fclose(predfl);
        fclose(docfl);
        free(line);
    }
    // GMUM.R changes }
    free(words);
    free_model(model,1);

    if(verbosity>=2) {
      C_PRINTF("done\n");

  /*   Note by Gary Boone                     Date: 29 April 2000        */
  /*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
  /*        Because classification of a single vector takes less than    */
  /*        0.01 secs, the timer was underflowing.                       */
      C_PRINTF("Runtime (without IO) in cpu-seconds: %.2f\n",
         (float)(runtime/100.0));

    }
    if((!no_accuracy) && (verbosity>=1)) {
      C_PRINTF("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
      C_PRINTF("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
    }

    return(0);
}
Exemple #8
0
struct Set *load_svmlight_name(struct PARAMS *P, char *fname){

	FILE *fp;
	char *line;
	long int i,num_lines,num_attribs,ll,total;
	struct Set *S;
	ldiv_t tmp;
	int v;
	char *c_id;


	if(P->partition==1){
		P->part=(long int**)malloc(sizeof(long int)*P->numViews);
		for(v=0;v<P->numViews;v++)P->part[v]=(long int*)malloc(sizeof(long int)*MAX_ATTRIBS_PER_VIEW);
		for(v=0;v<P->numViews;v++)for(i=0;i<MAX_ATTRIBS_PER_VIEW;i++)P->part[v][i]=0;
		//read partition file
		nol_ll("partition.coem",&num_lines,&num_attribs,&ll);
		line=(char*)malloc(sizeof(char)*(2+ll));
		printf("reading partition.coem...views ");fflush(stdout);
		num_lines--;
		if(P->numViews!=num_lines)halt("are not corresponding!\n");
		fp=fopen("partition.coem","r");
		for(v=0;v<num_lines;v++){
			printf("[%d]",v);
			fgets(line,(int)ll+2,fp);
			c_id=strtok(line," ,");
			i=0;
			do{
				if(atol(c_id)!=0){
					P->part[v][i]=atol(c_id);
					//printf("(%ld)",P->part[v][i]);
					i++;
				}
			}while ((c_id=strtok(NULL,", "))!=0);
		}
		fclose(fp);
		free(line);
		printf("...done\n");
	}



	nol_ll(fname,&num_lines,&num_attribs,&ll);
	line=(char*)malloc(sizeof(char)*(ll+2));
	S=(struct Set*)malloc(sizeof(struct Set));
	S->maxId = 0;
	S->example=(struct Example**)malloc(sizeof(struct Example*)*num_lines);

	if (!(fp=fopen(fname,"r"))) halt("cannot open fv file");

	total=0;
	for(i=0;i<=num_lines;i++){
		if(fgets(line,(int)ll+2,fp)) {
			if (*line!='\n'){
				S->example[total]=parse_svmlight(line, P);
				if(S->example[total]->View[0]->dim > S->maxId)
					S->maxId = S->example[total]->View[0]->dim;
				S->example[total]->nr=total;
				total++;
			}
			tmp=ldiv(total,100);

			/*
			if (tmp.rem==0){
				printf("...");
				printf("%ld",total);
				fflush(stdout);
			}
			*/
		}
	}
	//if(tmp.rem!=0)printf("...%ld",total);
	//printf(" examples read.\n");
	fflush(stdout);
	S->N=total;
	free(line);
	fclose(fp);

	S->num_labeled = total;
	S->num_unlabeled=0;
	S->num_positive=0;
	S->num_negative=0;

	if(P->partition==1){
		//free mem
		for(v=0;v<P->numViews;v++)free(P->part[v]);
		free(P->part);
	}

	return S;
}
Exemple #9
0
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm)
{
  /* Reads structural model sm from file file. This function is used
     only in the prediction module, not in the learning module. */
  FILE *modelfl;
  STRUCTMODEL sm;
  long i,queryid,slackid;
  double costfactor;
  long max_sv,max_words,ll,wpos;
  char *line,*comment;
  TOKEN *words;
  char version_buffer[100];
  MODEL *model;

  nol_ll(file,&max_sv,&max_words,&ll); /* scan size of model file */
  max_words+=2;
  ll+=2;

  words = (TOKEN *)my_malloc(sizeof(TOKEN)*(max_words+10));
  line = (char *)my_malloc(sizeof(char)*ll);
  model = (MODEL *)my_malloc(sizeof(MODEL));

  if ((modelfl = fopen (file, "r")) == NULL)
  { perror (file); exit (1); }

  fscanf(modelfl,"SVM-multiclass Version %s\n",version_buffer);
  if(strcmp(version_buffer,INST_VERSION)) {
    perror ("Version of model-file does not match version of svm_struct_classify!"); 
    exit (1); 
  }
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_classes);  
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_features);  
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->loss_function);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);

  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
  model->index=NULL;
  model->lin_weights=NULL;

  for(i=1;i<model->sv_num;i++) {
    fgets(line,(int)ll,modelfl);
    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
		       &costfactor,&wpos,max_words,&comment, true)) {
      printf("\nParsing error while reading model file in SV %ld!\n%s",
	     i,line);
      exit(1);
    }
    model->supvec[i] = create_example(-1,0,0,0.0,
				      create_svector(words,comment,1.0));
    model->supvec[i]->fvec->kernel_id=queryid;
  }
  fclose(modelfl);
  free(line);
  free(words);
  if(verbosity>=1) {
    fprintf(stdout, " (%d support vectors read) ",(int)(model->sv_num-1));
  }
  sm.svm_model=model;
  sm.sizePsi=model->totwords;
  sm.w=NULL;
  return(sm);
}