int _svm_learn (int argc, char* argv[])
{  
  char docfile[200];           /* file with training examples */
  char modelfile[200];         /* file for resulting classifier */
  char restartfile[200];       /* file with initial alphas */
  DOC **docs;  /* training examples */
  long totwords,totdoc,i;
  double *target;
  double *alpha_in=NULL;
  KERNEL_CACHE *kernel_cache;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

  HIDEO_ENV *hideo_env=create_env();

  model->td_pred=NULL;
  model->n_td_pred=0;

  _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity,
			&learn_parm,&kernel_parm);
  read_documents(docfile,&docs,&target,&totwords,&totdoc);
  if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);

  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
    kernel_cache=NULL;
  }
  else {
    /* Always get a new kernel cache. It is not possible to use the
       same cache for two different training runs */
    kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
  }

  if(learn_parm.type == CLASSIFICATION) {
    svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			     &kernel_parm,kernel_cache,model,alpha_in,hideo_env);
  }
  else if(learn_parm.type == REGRESSION) {
    svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			 &kernel_parm,&kernel_cache,model,hideo_env);
  }
  else if(learn_parm.type == RANKING) {
    svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
		      &kernel_parm,&kernel_cache,model,hideo_env);
  }
  else if(learn_parm.type == OPTIMIZATION) {
    svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,kernel_cache,model,alpha_in,hideo_env);
  }

  if(kernel_cache) {
    /* Free the memory used for the cache. */
    kernel_cache_cleanup(kernel_cache);
  }

  /* Warning: The model contains references to the original data 'docs'.
     If you want to free the original data, and only keep the model, you 
     have to make a deep copy of 'model'. */
  /* deep_copy_of_model=copy_model(model); */
  write_model(modelfile,model);

  free(alpha_in);
  free_model(model,0);
  for(i=0;i<totdoc;i++) 
    free_example(docs[i],1);
  free(docs);
  free(target);
  free_env(hideo_env);

  return(0);
}
Beispiel #2
0
int SVMLightRunner::librarySVMLearnMain(
    int argc, char **argv, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMLearnMain() Started."
    );
    DOC **docs;  /* training examples */
    long totwords,totdoc,i;
    double *target;
    double *alpha_in=NULL;
    KERNEL_CACHE *kernel_cache;
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

    // GMUM.R changes {
    librarySVMLearnReadInputParameters(
        argc, argv, docfile, modelfile, restartfile, &verbosity, &learn_parm,
        &kernel_parm, use_gmumr, config
    );

    kernel_parm.kernel_type = static_cast<long int>(config.kernel_type);

    libraryReadDocuments(
        docfile, &docs, &target, &totwords, &totdoc, use_gmumr, config
    );
    // GMUM.R changes }

    if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);

    if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
      kernel_cache=NULL;
    }
    else {
      /* Always get a new kernel cache. It is not possible to use the
       * same cache for two different training runs */
      kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
    }

    //gmum.r
    init_global_params_QP();

    if(learn_parm.type == CLASSIFICATION) {
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			     &kernel_parm,kernel_cache,model,alpha_in);
    }
    else if(learn_parm.type == REGRESSION) {
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			 &kernel_parm,&kernel_cache,model);
    }
    else if(learn_parm.type == RANKING) {
      svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
		      &kernel_parm,&kernel_cache,model);
    }
    else if(learn_parm.type == OPTIMIZATION) {
      svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,kernel_cache,model,alpha_in);
    }
    //gmum.r
    config.iter = learn_parm.iterations;

    if(kernel_cache) {
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(kernel_cache);
    }

    /* Warning: The model contains references to the original data 'docs'.
       If you want to free the original data, and only keep the model, you 
       have to make a deep copy of 'model'. */
    /* deep_copy_of_model=copy_model(model); */
    // GMUM.R changes {
    if (!use_gmumr) {
        write_model(modelfile,model);
    } else {
        SVMLightModelToSVMConfiguration(model, config);
    }
    // GMUM.R changes }

    free(alpha_in);
    free_model(model,0);
    for(i=0;i<totdoc;i++) 
      free_example(docs[i],1);
    free(docs);
    free(target);

    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMLearnMain() Done."
    );

    return(0);
}
Beispiel #3
0
int main (int argc, char* argv[])
{  
  DOC *docs;  /* training examples */
  long max_docs,max_words_doc;
  long totwords,totdoc,ll,i;
  long kernel_cache_size;
  double *target;
  KERNEL_CACHE kernel_cache;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  MODEL model;

  read_input_parameters(argc,argv,docfile,modelfile,&verbosity,
			&kernel_cache_size,&learn_parm,&kernel_parm);

  if(verbosity>=1) {
    printf("Scanning examples..."); fflush(stdout);
  }
  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
  max_words_doc+=10;
  ll+=10;
  max_docs+=2;
  if(verbosity>=1) {
    printf("done\n"); fflush(stdout);
  }

  docs = (DOC *)my_malloc(sizeof(DOC)*max_docs);         /* feature vectors */
  target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
 //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll);
  read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm);
  printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords);
 
 //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc 
  				                 // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ???????

  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
    if(learn_parm.type == CLASSIFICATION) {
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			       &kernel_parm,NULL,&model);
    }
    else if(learn_parm.type == REGRESSION) {
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,NULL,&model);
    }
    else if(learn_parm.type == RANKING) {
      svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,NULL,&model);
    }
  }
  else {
    if(learn_parm.type == CLASSIFICATION) {
      /* Always get a new kernel cache. It is not possible to use the
         same cache for two different training runs */
      kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size);
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			       &kernel_parm,&kernel_cache,&model);
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(&kernel_cache);
    }
    else if(learn_parm.type == REGRESSION) {
      /* Always get a new kernel cache. It is not possible to use the
         same cache for two different training runs */
      kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size);
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,&kernel_cache,&model);
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(&kernel_cache);
    }
    else if(learn_parm.type == RANKING) {
      printf("Learning rankings is not implemented for non-linear kernels in this version!\n");
      exit(1);
    }
    else if(learn_parm.type == PERCEPTRON) {
			perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,&kernel_cache,&model,modelfile);
    }
	    else if(learn_parm.type == PERCEPTRON_BATCH) {
			batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,kernel_cache_size,&model);
    }

  }

  /* Warning: The model contains references to the original data 'docs'.
     If you want to free the original data, and only keep the model, you 
     have to make a deep copy of 'model'. */
  write_model(modelfile,&model);

  free(model.supvec);
  free(model.alpha);
  free(model.index);
  
   for(i=0;i<totdoc;i++){
     freeExample(&docs[i]);
   }
  
  free(docs);
  free(target);

  return(0);
}
Beispiel #4
0
/* call as  model = mexsvmlearn(data,labels,options) */
void mexFunction(int nlhs, mxArray *plhs[],
				 int nrhs, const mxArray *prhs[])
{
	char **argv;
	int argc;
	DOC **docs;  /* training examples */
	long totwords,totdoc,i;
	double *target;
	double *alpha_in=NULL;
	KERNEL_CACHE *kernel_cache;
	LEARN_PARM learn_parm;
	KERNEL_PARM kernel_parm;
	MODEL model;

	/* check for valid calling format */
	if ((nrhs != 3)  || (nlhs != 1))
		mexErrMsgTxt(ERR001);

	if (mxGetM(prhs[0]) != mxGetM(prhs[1]))
		mexErrMsgTxt(ERR002);

	if (mxGetN(prhs[1]) != 1)
		mexErrMsgTxt(ERR003);

	/* reset static variables -- as a .DLL, static things are sticky  */
	global_init( );

	/* convert the parameters (given in prhs[2]) into an argv/argc combination */
	argv = make_argv((mxArray *)prhs[2],&argc); /* send the options */



	/* this was originally supposed to be argc, argv, re-written for MATLAB ...
	its cheesy - but it workss :: convert the options array into an argc, 
	argv pair  and let svm_lite handle it from there. */

	read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, 
		&learn_parm,&kernel_parm);

	extract_user_opts((mxArray *)prhs[2], &kernel_parm);

	totdoc = mxGetM(prhs[0]);
	totwords = mxGetN(prhs[0]);

	/* prhs[0] = samples (mxn) array
	prhs[1] = labels (mx1) array */
	mexToDOC((mxArray *)prhs[0], (mxArray *)prhs[1], &docs, &target, NULL, NULL);

	/* TODO modify to accept this array 
	if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); */

	if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
		kernel_cache=NULL;
	}
	else {
		/* Always get a new kernel cache. It is not possible to use the
		same cache for two different training runs */
		kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
	}


	if(learn_parm.type == CLASSIFICATION) {
		svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,kernel_cache,&model,alpha_in);

	}
	else if(learn_parm.type == REGRESSION) {
		svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,&kernel_cache,&model);
	}
	else if(learn_parm.type == RANKING) {
		svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,&kernel_cache,&model);
	}
	else if(learn_parm.type == OPTIMIZATION) {
		svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			&kernel_parm,kernel_cache,&model,alpha_in);
	}
	else {
		mexErrMsgTxt(ERR004);
	}

	if(kernel_cache) {
		/* Free the memory used for the cache. */
		kernel_cache_cleanup(kernel_cache);
	}

	/* **********************************
	* After the training/learning portion has finished,
	* copy the model back to the output arrays for MATLAB 
	* ********************************** */
	store_model(&model, plhs);

	free_kernel();
	global_destroy( );	
}