int _svm_learn (int argc, char* argv[]) { char docfile[200]; /* file with training examples */ char modelfile[200]; /* file for resulting classifier */ char restartfile[200]; /* file with initial alphas */ DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); HIDEO_ENV *hideo_env=create_env(); model->td_pred=NULL; model->n_td_pred=0; _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); read_documents(docfile,&docs,&target,&totwords,&totdoc); if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ write_model(modelfile,model); free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); free_env(hideo_env); return(0); }
int SVMLightRunner::librarySVMLearnMain( int argc, char **argv, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Started." ); DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); // GMUM.R changes { librarySVMLearnReadInputParameters( argc, argv, docfile, modelfile, restartfile, &verbosity, &learn_parm, &kernel_parm, use_gmumr, config ); kernel_parm.kernel_type = static_cast<long int>(config.kernel_type); libraryReadDocuments( docfile, &docs, &target, &totwords, &totdoc, use_gmumr, config ); // GMUM.R changes } if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the * same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } //gmum.r init_global_params_QP(); if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } //gmum.r config.iter = learn_parm.iterations; if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ // GMUM.R changes { if (!use_gmumr) { write_model(modelfile,model); } else { SVMLightModelToSVMConfiguration(model, config); } // GMUM.R changes } free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Done." ); return(0); }
int main (int argc, char* argv[]) { DOC *docs; /* training examples */ long max_docs,max_words_doc; long totwords,totdoc,ll,i; long kernel_cache_size; double *target; KERNEL_CACHE kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; read_input_parameters(argc,argv,docfile,modelfile,&verbosity, &kernel_cache_size,&learn_parm,&kernel_parm); if(verbosity>=1) { printf("Scanning examples..."); fflush(stdout); } nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ max_words_doc+=10; ll+=10; max_docs+=2; if(verbosity>=1) { printf("done\n"); fflush(stdout); } docs = (DOC *)my_malloc(sizeof(DOC)*max_docs); /* feature vectors */ target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll); read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm); printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords); //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ??????? if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } } else { if(learn_parm.type == CLASSIFICATION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size); svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == REGRESSION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size); svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == RANKING) { printf("Learning rankings is not implemented for non-linear kernels in this version!\n"); exit(1); } else if(learn_parm.type == PERCEPTRON) { perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model,modelfile); } else if(learn_parm.type == PERCEPTRON_BATCH) { batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache_size,&model); } } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ write_model(modelfile,&model); free(model.supvec); free(model.alpha); free(model.index); for(i=0;i<totdoc;i++){ freeExample(&docs[i]); } free(docs); free(target); return(0); }
/* call as model = mexsvmlearn(data,labels,options) */ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { char **argv; int argc; DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; /* check for valid calling format */ if ((nrhs != 3) || (nlhs != 1)) mexErrMsgTxt(ERR001); if (mxGetM(prhs[0]) != mxGetM(prhs[1])) mexErrMsgTxt(ERR002); if (mxGetN(prhs[1]) != 1) mexErrMsgTxt(ERR003); /* reset static variables -- as a .DLL, static things are sticky */ global_init( ); /* convert the parameters (given in prhs[2]) into an argv/argc combination */ argv = make_argv((mxArray *)prhs[2],&argc); /* send the options */ /* this was originally supposed to be argc, argv, re-written for MATLAB ... its cheesy - but it workss :: convert the options array into an argc, argv pair and let svm_lite handle it from there. */ read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); extract_user_opts((mxArray *)prhs[2], &kernel_parm); totdoc = mxGetM(prhs[0]); totwords = mxGetN(prhs[0]); /* prhs[0] = samples (mxn) array prhs[1] = labels (mx1) array */ mexToDOC((mxArray *)prhs[0], (mxArray *)prhs[1], &docs, &target, NULL, NULL); /* TODO modify to accept this array if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); */ if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,&model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,&model,alpha_in); } else { mexErrMsgTxt(ERR004); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* ********************************** * After the training/learning portion has finished, * copy the model back to the output arrays for MATLAB * ********************************** */ store_model(&model, plhs); free_kernel(); global_destroy( ); }