예제 #1
0
int si_compile(Octstr *si_doc, Octstr *charset, Octstr **si_binary)
{
    simple_binary_t *sibxml;
    int ret;
    xmlDocPtr pDoc;
    size_t size;
    char *si_c_text;

    *si_binary = octstr_create(""); 
    sibxml = simple_binary_create();

    octstr_strip_blanks(si_doc);
    set_charset(si_doc, charset);
    size = octstr_len(si_doc);
    si_c_text = octstr_get_cstr(si_doc);
    pDoc = xmlParseMemory(si_c_text, size);

    ret = 0;
    if (pDoc) {
        ret = parse_document(pDoc, charset, &sibxml);
        simple_binary_output(*si_binary, sibxml);
        xmlFreeDoc(pDoc);
    } else {
        xmlFreeDoc(pDoc);
        octstr_destroy(*si_binary);
        simple_binary_destroy(sibxml);
        error(0, "SI: No document to parse. Probably an error in SI source");
        return -1;
    }

    simple_binary_destroy(sibxml);

    return ret;
}
예제 #2
0
SdfNode* sdf_parse_file(const char* filename) {
	// Open the file
	FILE * file = fopen(filename, "r");

	// Check to see if we actually opened the file OK
	if (file == NULL) {
		printf("Unable to open file \"%s\"", filename);
		return NULL;
	}

	// Prepare the parser status object
	ParserData data;
	data.file = file;
	data.filename = filename;
	data.buffer_size = INITIAL_BUFFER_SIZE;
	data.line = malloc(data.buffer_size * sizeof (char));
	data.linenumber = 0;
	data.eof = false;
	next_line(&data);

	// Build the AST
	SdfNode* root = parse_document(&data);

	// Clean up before return
	fclose(file);
	free(data.line);

	// Return
	return root;
}
예제 #3
0
파일: ota_compiler.c 프로젝트: armic/erpts
int ota_compile(Octstr *ota_doc, Octstr *charset, Octstr **ota_binary)
{
    simple_binary_t *otabxml;
    int ret;
    xmlDocPtr pDoc;
    size_t size;
    char *ota_c_text;

    *ota_binary = octstr_create(""); 
    otabxml = simple_binary_create();

    octstr_strip_blanks(ota_doc);
    octstr_shrink_blanks(ota_doc);
    set_charset(ota_doc, charset);
    size = octstr_len(ota_doc);
    ota_c_text = octstr_get_cstr(ota_doc);
    pDoc = xmlParseMemory(ota_c_text, size);

    ret = 0;
    if (pDoc) {
        ret = parse_document(pDoc, charset, &otabxml);
        simple_binary_output(*ota_binary, otabxml);
        xmlFreeDoc(pDoc);
    } else {
        xmlFreeDoc(pDoc);
        octstr_destroy(*ota_binary);
        simple_binary_destroy(otabxml);
        error(0, "OTA: No document to parse. Probably an error in OTA source");
        return -1;
    }

    simple_binary_destroy(otabxml);

    return ret;
}
예제 #4
0
void read_documents(char *docfile, DOC ***docs, double **label, 
		    long int *totwords, long int *totdoc)
{
  char *line,*comment;
  WORD *words;
  long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
  long max_words_doc, ll;
  double doc_label,costfactor;
  FILE *docfl;

  if(verbosity>=1) {
    printf("Scanning examples..."); fflush(stdout);
  }
  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
  max_words_doc+=2;
  ll+=2;
  max_docs+=2;
  if(verbosity>=1) {
    printf("done\n"); fflush(stdout);
  }

  (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
  (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
  line = (char *)my_malloc(sizeof(char)*ll);

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }

  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
  if(verbosity>=1) {
    printf("Reading examples into memory..."); fflush(stdout);
  }
  dnum=0;
  (*totwords)=0;
  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
		       &wpos,max_words_doc,&comment)) {
      printf("\nParsing error in line %ld!\n%s",dnum,line);
      exit(1);
    }
    (*label)[dnum]=doc_label;
    /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */
    if(doc_label > 0) dpos++;
    if (doc_label < 0) dneg++;
    if (doc_label == 0) dunlab++;
    if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) 
      (*totwords)=(words[wpos-2]).wnum;
    if((*totwords) > MAXFEATNUM) {
      printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
      printf("LINE: %s\n",line);
      exit(1);
    }
    (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
				   create_svector(words,comment,1.0));
    /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */
    dnum++;  
    if(verbosity>=1) {
      if((dnum % 100) == 0) {
	printf("%ld..",dnum); fflush(stdout);
      }
    }
  } 

  fclose(docfl);
  my_free(line);
  my_free(words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%ld examples read)\n", dnum);
  }
  (*totdoc)=dnum;
}
예제 #5
0
MODEL *read_model(char *modelfile)
{
  FILE *modelfl;
  long i,queryid,slackid;
  double costfactor;
  long max_sv,max_words,ll,wpos;
  char *line,*comment;
  WORD *words;
  char version_buffer[100];
  MODEL *model;

  if(verbosity>=1) {
    printf("Reading model..."); fflush(stdout);
  }

  nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
  max_words+=2;
  ll+=2;

  words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
  line = (char *)my_malloc(sizeof(char)*ll);
  model = (MODEL *)my_malloc(sizeof(MODEL));

  if ((modelfl = fopen (modelfile, "r")) == NULL)
  { perror (modelfile); exit (1); }

  fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
  if(strcmp(version_buffer,VERSION)) {
    perror ("Version of model-file does not match version of svm_classify!"); 
    exit (1); 
  }
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);

  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
  model->index=NULL;
  model->lin_weights=NULL;

  for(i=1;i<model->sv_num;i++) {
    fgets(line,(int)ll,modelfl);
    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
		       &costfactor,&wpos,max_words,&comment)) {
      printf("\nParsing error while reading model file in SV %ld!\n%s",
	     i,line);
      exit(1);
    }
    model->supvec[i] = create_example(-1,
				      0,0,
				      0.0,
				      create_svector(words,comment,1.0));
  }
  fclose(modelfl);
  my_free(line);
  my_free(words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
  }
  return(model);
}
예제 #6
0
int main_classify (int argc, char* argv[])
{
  DOC *doc;   /* test example */
  WORDSVM *words;
  long max_docs,max_words_doc,lld;
  long totdoc=0,queryid,slackid;
  long correct=0,incorrect=0,no_accuracy=0;
  long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
  long j;
  double t1,runtime=0;
  double dist,doc_label,costfactor;
  char *line,*comment; 
  FILE *predfl,*docfl;
  MODEL *model; 

  read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
			&verbosity,&pred_format);

  nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
  max_words_doc+=2;
  lld+=2;

  line = (char *)my_malloc(sizeof(char)*lld);
  words = (WORDSVM *)my_malloc(sizeof(WORDSVM)*(max_words_doc+10));

  model=read_model(modelfile);

  if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
    /* compute weight vector */
    add_weight_vector_to_linear_model(model);
  }
  
  if(verbosity>=2) {
    printf("Classifying test examples.."); fflush(stdout);
  }

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }
  if ((predfl = fopen (predictionsfile, "w")) == NULL)
  { perror (predictionsfile); exit (1); }

  while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
		   max_words_doc,&comment);
    totdoc++;
    if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
      for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
	if((words[j]).wnum>model->totwords) /* are not larger than in     */
	  (words[j]).wnum=0;               /* model. Remove feature if   */
      }                                        /* necessary.                 */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example_linear(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    else {                             /* non-linear kernel */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    if(dist>0) {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
      }
      if(doc_label>0) correct++; else incorrect++;
      if(doc_label>0) res_a++; else res_b++;
    }
    else {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
      }
      if(doc_label<0) correct++; else incorrect++;
      if(doc_label>0) res_c++; else res_d++;
    }
    if(pred_format==1) { /* output the value of decision function */
      fprintf(predfl,"%.8g\n",dist);
    }
    if((int)(0.01+(doc_label*doc_label)) != 1) 
      { no_accuracy=1; } /* test data is not binary labeled */
    if(verbosity>=2) {
      if(totdoc % 100 == 0) {
	printf("%ld..",totdoc); fflush(stdout);
      }
    }
  }  
  free(line);
  free(words);
  free_model(model,1);

  if(verbosity>=2) {
    printf("done\n");

/*   Note by Gary Boone                     Date: 29 April 2000        */
/*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
/*        Because classification of a single vector takes less than    */
/*        0.01 secs, the timer was underflowing.                       */
    printf("Runtime (without IO) in cpu-seconds: %.2f\n",
	   (float)(runtime/100.0));
    
  }
  if((!no_accuracy) && (verbosity>=1)) {
    printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
    printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
  }

  return(0);
}
예제 #7
0
RelaxNGSchema::RelaxNGSchema(const Document* document)
: pimpl_(new Impl)
{
  parse_document(document);
}
예제 #8
0
파일: svm_common.c 프로젝트: evankos/SCSI
void read_documents(char *docfile, DOC *docs, double *label, 
		    long int max_words_doc, long int ll, 
		    long int *totwords, long int *totdoc, KERNEL_PARM *kernel_parm)
{
  char *line;
  DOC doc;
  long dnum=0,dpos=0,dneg=0,dunlab=0;
  double doc_label;
  FILE *docfl;

  line = (char *)my_malloc(sizeof(char)*ll);

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }

  if(verbosity>=1) {
    printf("Reading examples into memory..."); fflush(stdout);
  }
  dnum=0;
  (*totwords)=0;
  
  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {

      doc.docnum=dnum+1;
      if(strlen(line)==0){
         printf("\nERROR: empty line, missing end of line before end of file\n");
         exit(1);
      }
         

      if(!parse_document(line, &doc, &doc_label, totwords, max_words_doc, kernel_parm)) {
         printf("\nParsing error in line %ld!\n%s",dnum,line);
         exit(1);
      }

    label[dnum]=doc_label;
    /*  printf("Class=%ld ",doc_label);  */
    if (doc_label > 0) dpos++;
    if (doc_label < 0) dneg++;
    if (doc_label == 0) dunlab++;

    docs[dnum].queryid = doc.queryid;
    docs[dnum].costfactor = doc.costfactor;
    
    docs[dnum].forest_vec = doc.forest_vec;
	docs[dnum].num_of_trees = doc.num_of_trees;
	docs[dnum].vectors = doc.vectors;
	docs[dnum].num_of_vectors = doc.num_of_vectors;
      
    // less than 5 basic kernels and greater than 50 only vectors (to save memory)
    if (kernel_parm->kernel_type<4) { // from 0 to 3 are original kernels => no trees
       freeForest(&doc); // save memory by freeing trees
       docs[dnum].num_of_trees = 0;
       docs[dnum].forest_vec =NULL;
       kernel_parm->second_kernel=kernel_parm->kernel_type;
    }   
 
    // establish some interval to free vectors
    
//    if(kernel_parm->kernel_type>20){
//	     docs[dnum].vectors = NULL;
//         docs[dnum].num_of_vectors = 0;
//         freeVectorSet(&doc); // save memory by freeing vectors
//     }
      
    docs[dnum].docnum=dnum;

/* printf("\nNorm=%f\n",docs[dnum].twonorm_sq);  */

/*printf("parse tree number %d: ",dnum);
    writeTreeString(doc.root);
*/
/*    printf("%d\t",(int)doc_label);  
*/ 
    dnum++;  
    if(verbosity>=1) {
      if((dnum % 100) == 0) {
	           printf("%ld..",dnum); fflush(stdout);
      }
    }
  } 

  fclose(docfl);
  free(line);

  if(verbosity>=1) {
    fprintf(stdout, "OK. (%ld examples read)\n", dnum);
  }

fflush(stdout);

  (*totdoc)=dnum;
}
예제 #9
0
void SVMLightRunner::libraryReadDocuments (
    char *docfile, DOC ***docs, double **label, long int *totwords,
    long int *totdoc, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadDocuments() Started."
    );

    char *line,*comment;
    WORD *words;
    long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
    long max_words_doc, ll;
    double doc_label,costfactor;
    FILE *docfl;

    if(verbosity>=1) {
      C_PRINTF("Scanning examples..."); C_FFLUSH(stdout);
    }
    // GMUM.R changes {
    if (!use_gmumr) {
        nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
    } else {
        max_docs = config.target.n_rows;
        max_words_doc = config.getDataDim();
        // ll used only for file reading
    }
    // GMUM.R changes }
    max_words_doc+=2;
    ll+=2;
    max_docs+=2;
    if(verbosity>=1) {
      C_PRINTF("done\n"); C_FFLUSH(stdout);
    }

    (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
    (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
    // GMUM.R changes {
    if (!use_gmumr) {
        line = (char *)my_malloc(sizeof(char)*ll);

        if ((docfl = fopen (docfile, "r")) == NULL)
        { perror (docfile); EXIT (1); }
    }
    // GMUM.R changes }

    words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
    if(verbosity>=1) {
      C_PRINTF("Reading examples into memory..."); C_FFLUSH(stdout);
    }
    dnum=0;
    (*totwords)=0;
    // GMUM.R changes {
    bool newline;
    if (!use_gmumr) {
        newline = (!feof(docfl)) && fgets(line,(int)ll,docfl);
    } else {
        newline = false;
        if (dnum < config.target.n_rows) {
            newline = true;
            std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum);
            line = new char[str.size() + 1];
            std::copy(str.begin(), str.end(), line);
            line[str.size()] = '\0';
        }
    }
    while(newline) {
      if (use_gmumr) {
            std::string stringline = "";
      }
      // GMUM.R changes }
      if(line[0] == '#') continue;  /* line contains comments */
      if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
                 &wpos,max_words_doc,&comment)) {
        C_PRINTF("\nParsing error in line %ld!\n%s",dnum,line);
        EXIT(1);
      }
      (*label)[dnum]=doc_label;
      /* C_PRINTF("docnum=%ld: Class=%f ",dnum,doc_label); */
      if(doc_label > 0) dpos++;
      if (doc_label < 0) dneg++;
      if (doc_label == 0) {
    	  if(config.use_transductive_learning){
    		  dunlab++;
    	  }else{
    		  C_PRINTF("Please for transductive learning pass use_transductive_learning\n");
    		  EXIT(1);
    	  }
      }
      if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) 
        (*totwords)=(words[wpos-2]).wnum;
      if((*totwords) > MAXFEATNUM) {
        C_PRINTF("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
        EXIT(1);
      }
      (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
                     create_svector(words,comment,1.0));
      /* C_PRINTF("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */
      dnum++;  
      if(verbosity>=1) {
        if((dnum % 100) == 0) {
        	C_PRINTF("%ld..",dnum); C_FFLUSH(stdout);
        }
      }
      // GMUM.R changes {
      if (!use_gmumr) {
          newline = (!feof(docfl)) && fgets(line,(int)ll,docfl);
      } else {
          newline = false;
          if (dnum < config.target.n_rows) {
              newline = true;
              std::string str = SVMConfigurationToSVMLightLearnInputLine(config, dnum);
              line = new char[str.size() + 1];
              std::copy(str.begin(), str.end(), line);
              line[str.size()] = '\0';
          }
      }
      // GMUM.R changes }
    } 

    if (!use_gmumr) {
        fclose(docfl);
        free(line);
    };
    free(words);
    if(verbosity>=1) {
      C_FPRINTF(stdout, "OK. (%ld examples read)\n", dnum);
    }
    (*totdoc)=dnum;
}
예제 #10
0
MODEL * SVMLightRunner::libraryReadModel(
    char *modelfile, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadModel() Started."
    );
    FILE *modelfl;
    long i,queryid,slackid;
    double costfactor;
    long max_sv,max_words,ll,wpos;
    char *line,*comment;
    WORD *words;
    char version_buffer[100];
    MODEL *model;

    if(verbosity>=1) {
      C_PRINTF("Reading model..."); C_FFLUSH(stdout);
    }

    // GMUM.R changes {
    model = (MODEL *)my_malloc(sizeof(MODEL));

    if (!use_gmumr) {
        nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
        max_words+=2;
        ll+=2;

        words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
        line = (char *)my_malloc(sizeof(char)*ll);

        if ((modelfl = fopen (modelfile, "r")) == NULL)
        { perror (modelfile); EXIT (1); }

        fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
        if(strcmp(version_buffer,VERSION)) {
          perror ("Version of model-file does not match version of svm_classify!"); 
          EXIT (1); 
        }
        fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
        fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
        fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

        fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
        fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
        fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
        fscanf(modelfl,"%lf%*[^\n]\n", &model->b);
    } else { // use_gmumr
        max_words = config.getDataDim();
        words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));

        LOG(
            config.log,
            LogLevel::DEBUG_LEVEL,
            __debug_prefix__ + ".libraryReadModel() Converting config to model..."
        );

        /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom -- same as GMUM.R! */
        model->kernel_parm.kernel_type = static_cast<long int>(config.kernel_type);
        // -d int      -> parameter d in polynomial kernel
        model->kernel_parm.poly_degree = config.degree;
        // -g float    -> parameter gamma in rbf kernel
        model->kernel_parm.rbf_gamma = config.gamma;
        // -s float    -> parameter s in sigmoid/poly kernel
        model->kernel_parm.coef_lin = config.gamma;
        // -r float    -> parameter c in sigmoid/poly kernel
        model->kernel_parm.coef_const = config.coef0;
        // -u string   -> parameter of user defined kernel
        char kernel_parm_custom[50] = "empty";
        char * model_kernel_parm_custom = model->kernel_parm.custom;
        model_kernel_parm_custom = kernel_parm_custom;
        // highest feature index
        model->totwords = config.getDataDim();
        // number of training documents
        model->totdoc = config.target.n_rows;
        // number of support vectors plus 1 (!)
        model->sv_num = config.l + 1;
        /* Threshold b (has opposite sign than SVMClient::predict())
         * In svm_common.c:57 in double classify_example_linear():
         *     return(sum-model->b);
         */
        model->b = - config.b;

        LOG(
            config.log,
            LogLevel::DEBUG_LEVEL,
            __debug_prefix__ + ".libraryReadModel() Converting config done."
        );
    }
    // GMUM.R changes }

    model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
    model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
    model->index=NULL;
    model->lin_weights=NULL;

    // GMUM.R changes {
    if (!use_gmumr) {
        for(i=1;i<model->sv_num;i++) {
          fgets(line,(int)ll,modelfl);
          if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
                     &costfactor,&wpos,max_words,&comment)) {
            C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s",
               i,line);
            EXIT(1);
          }
          model->supvec[i] = create_example(-1,
                            0,0,
                            0.0,
                            create_svector(words,comment,1.0));
        }
        fclose(modelfl);
        free(line);
    } else {
        for(i = 1; i < model->sv_num; ++i) {
            line = SVMConfigurationToSVMLightModelSVLine(config, i-1);
            if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
                       &costfactor,&wpos,max_words,&comment)) {
              C_PRINTF("\nParsing error while reading model file in SV %ld!\n%s",
                 i,line);
              EXIT(1);
            }
            model->supvec[i] = create_example(-1,
                              0,0,
                              0.0,
                              create_svector(words,comment,1.0));
            free(line);
        }
    }
    // GMUM.R changes }
    free(words);
    if(verbosity>=1) {
      C_FPRINTF(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
    }

    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".libraryReadModel() Done."
    );

    return(model);
}
예제 #11
0
int SVMLightRunner::librarySVMClassifyMain(
    int argc, char **argv, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMClassifyMain() Started."
    );
    DOC *doc;   /* test example */
    WORD *words;
    long max_docs,max_words_doc,lld;
    long totdoc=0,queryid,slackid;
    long correct=0,incorrect=0,no_accuracy=0;
    long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
    long j;
    double t1,runtime=0;
    double dist,doc_label,costfactor;
    char *line,*comment; 
    FILE *predfl,*docfl;
    MODEL *model; 

    // GMUM.R changes {
    librarySVMClassifyReadInputParameters(
        argc, argv, docfile, modelfile, predictionsfile, &verbosity,
        &pred_format, use_gmumr, config);

    if (!use_gmumr) {
        nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
        lld+=2;

        line = (char *)my_malloc(sizeof(char)*lld);
    } else {
        max_docs = config.target.n_rows;
        max_words_doc = config.getDataDim();
        config.result = arma::zeros<arma::vec>(max_docs);
        // Prevent writing to the file
        pred_format = -1;
        // lld used only for file reading
    }
    max_words_doc+=2;
    words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
    // GMUM.R changes }

    model=libraryReadModel(modelfile, use_gmumr, config);
    // GMUM.R changes }

    if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
      /* compute weight vector */
      add_weight_vector_to_linear_model(model);
    }
    
    if(verbosity>=2) {
      C_PRINTF("Classifying test examples.."); C_FFLUSH(stdout);
    }

    // GMUM.R changes {
    bool newline;
    if (!use_gmumr) {
        if ((predfl = fopen (predictionsfile, "w")) == NULL)
        { perror (predictionsfile); EXIT (1); }
        if ((docfl = fopen (docfile, "r")) == NULL)
        { perror (docfile); EXIT (1); }

        newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
    } else {
        newline = false;
        if (totdoc < config.getDataExamplesNumber()) {
            newline = true;
            std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
            line = new char[str.size() + 1];
            std::copy(str.begin(), str.end(), line);
            line[str.size()] = '\0';
        }
    }
    while(newline) {
      if (use_gmumr) {
            std::string stringline = "";
      }
      // GMUM.R changes }
      if(line[0] == '#') continue;  /* line contains comments */
      parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
             max_words_doc,&comment);
      totdoc++;
      if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
        for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
      if((words[j]).wnum>model->totwords) /* are not larger than in     */
        (words[j]).wnum=0;               /* model. Remove feature if   */
        }                                        /* necessary.                 */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example_linear(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      else {                             /* non-linear kernel */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      if(dist>0) {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
        }
        if(doc_label>0) correct++; else incorrect++;
        if(doc_label>0) res_a++; else res_b++;
      }
      else {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
        }
        if(doc_label<0) correct++; else incorrect++;
        if(doc_label>0) res_c++; else res_d++;
      }
      if(pred_format==1) { /* output the value of decision function */
        C_FPRINTF(predfl,"%.8g\n",dist);
      }
      if((int)(0.01+(doc_label*doc_label)) != 1)
        { no_accuracy=1; } /* test data is not binary labeled */
      if(verbosity>=2) {
        if(totdoc % 100 == 0) {
      C_PRINTF("%ld..",totdoc); C_FFLUSH(stdout);
        }
      }
      // GMUM.R changes {
      if (!use_gmumr) {
          newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
      } else {
          newline = false;
          // Store prediction result in config
          config.result[totdoc-1] = dist;
          // Read next line
          if (totdoc < config.getDataExamplesNumber()) {
              newline = true;
              std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
              line = new char[str.size() + 1];
              std::copy(str.begin(), str.end(), line);
              line[str.size()] = '\0';
          }
      }
    }
    if (!use_gmumr) {
        fclose(predfl);
        fclose(docfl);
        free(line);
    }
    // GMUM.R changes }
    free(words);
    free_model(model,1);

    if(verbosity>=2) {
      C_PRINTF("done\n");

  /*   Note by Gary Boone                     Date: 29 April 2000        */
  /*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
  /*        Because classification of a single vector takes less than    */
  /*        0.01 secs, the timer was underflowing.                       */
      C_PRINTF("Runtime (without IO) in cpu-seconds: %.2f\n",
         (float)(runtime/100.0));

    }
    if((!no_accuracy) && (verbosity>=1)) {
      C_PRINTF("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
      C_PRINTF("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
    }

    return(0);
}
예제 #12
0
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm)
{
  /* Reads structural model sm from file file. This function is used
     only in the prediction module, not in the learning module. */
  FILE *modelfl;
  STRUCTMODEL sm;
  long i,queryid,slackid;
  double costfactor;
  long max_sv,max_words,ll,wpos;
  char *line,*comment;
  TOKEN *words;
  char version_buffer[100];
  MODEL *model;

  nol_ll(file,&max_sv,&max_words,&ll); /* scan size of model file */
  max_words+=2;
  ll+=2;

  words = (TOKEN *)my_malloc(sizeof(TOKEN)*(max_words+10));
  line = (char *)my_malloc(sizeof(char)*ll);
  model = (MODEL *)my_malloc(sizeof(MODEL));

  if ((modelfl = fopen (file, "r")) == NULL)
  { perror (file); exit (1); }

  fscanf(modelfl,"SVM-multiclass Version %s\n",version_buffer);
  if(strcmp(version_buffer,INST_VERSION)) {
    perror ("Version of model-file does not match version of svm_struct_classify!"); 
    exit (1); 
  }
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_classes);  
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->num_features);  
  fscanf(modelfl,"%d%*[^\n]\n", &sparm->loss_function);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);

  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
  model->index=NULL;
  model->lin_weights=NULL;

  for(i=1;i<model->sv_num;i++) {
    fgets(line,(int)ll,modelfl);
    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
		       &costfactor,&wpos,max_words,&comment, true)) {
      printf("\nParsing error while reading model file in SV %ld!\n%s",
	     i,line);
      exit(1);
    }
    model->supvec[i] = create_example(-1,0,0,0.0,
				      create_svector(words,comment,1.0));
    model->supvec[i]->fvec->kernel_id=queryid;
  }
  fclose(modelfl);
  free(line);
  free(words);
  if(verbosity>=1) {
    fprintf(stdout, " (%d support vectors read) ",(int)(model->sv_num-1));
  }
  sm.svm_model=model;
  sm.sizePsi=model->totwords;
  sm.w=NULL;
  return(sm);
}
예제 #13
0
void read_documents(char *docfile, DOC *docs, double *label, 
		    long int max_words_doc, long int ll, 
		    long int *totwords, long int *totdoc)
{
  char *line;
  DOC doc;
  long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0;
  double doc_label;
  FILE *docfl;

  line = (char *)my_malloc(sizeof(char)*ll);

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }

  doc.words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
  if(verbosity>=1) {
    printf("Reading examples into memory..."); fflush(stdout);
  }
  dnum=0;
  (*totwords)=0;
  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc)) {
      printf("\nParsing error in line %ld!\n%s",dnum,line);
      exit(1);
    }
    label[dnum]=doc_label;
    /*  printf("Class=%ld ",doc_label);  */
    if(doc_label > 0) dpos++;
    if (doc_label < 0) dneg++;
    if (doc_label == 0) dunlab++;
    if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords))) 
      (*totwords)=(doc.words[wpos-2]).wnum;
    docs[dnum].queryid = doc.queryid;
    docs[dnum].costfactor = doc.costfactor;
    docs[dnum].words = (WORD *)my_malloc(sizeof(WORD)*(wpos));
    docs[dnum].docnum=dnum;
    for(i=0;i<wpos;i++) { 
      docs[dnum].words[i]=doc.words[i];
      /*  printf("%ld::%f ",(docs[dnum].words[i]).wnum,(docs[dnum].words[i]).weight);  */

    }
    docs[dnum].twonorm_sq=doc.twonorm_sq;
    /* printf("\nNorm=%f\n",docs[dnum].twonorm_sq);  */
    dnum++;  
    if(verbosity>=1) {
      if((dnum % 100) == 0) {
	printf("%ld..",dnum); fflush(stdout);
      }
    }
  } 

  fclose(docfl);
  free(line);
  free(doc.words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%ld examples read)\n", dnum);
  }
  (*totdoc)=dnum;
}