SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm) { /* Reads training examples and returns them in sample. The number of examples must be written into sample.n */ SAMPLE sample; /* sample */ EXAMPLE *examples; long n; /* number of examples */ DOC **docs; /* examples in original SVM-light format */ double *target; long totwords, i, num_classes = 0; /* Using the read_documents function from SVM-light */ read_documents(file, &docs, &target, &totwords, &n); examples = (EXAMPLE *)my_malloc(sizeof(EXAMPLE)*n); for( i=0; i<n; i++ ) /* find highest class label */ if(num_classes < (target[i]+0.1)) num_classes = target[i]+0.1; for( i=0; i<n; i++ ) /* make sure all class labels are positive */ if(target[i]<1) { printf("\nERROR: The class label '%lf' of example number %ld is not greater than '1'!\n",target[i],i+1); exit(1); } for( i=0; i<n; i++ ) { /* copy docs over into new datastructure */ examples[i].x.doc = docs[i]; examples[i].y.classlabel = target[i]+0.1; examples[i].y.scores = NULL; examples[i].y.num_classes = num_classes; } free(target); free(docs); sample.n = n; sample.examples = examples; if(struct_verbosity>=0) printf(" (%d examples) ",sample.n); return(sample); }
int _svm_learn (int argc, char* argv[]) { char docfile[200]; /* file with training examples */ char modelfile[200]; /* file for resulting classifier */ char restartfile[200]; /* file with initial alphas */ DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); HIDEO_ENV *hideo_env=create_env(); model->td_pred=NULL; model->n_td_pred=0; _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); read_documents(docfile,&docs,&target,&totwords,&totdoc); if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ write_model(modelfile,model); free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); free_env(hideo_env); return(0); }
int main (int argc, char* argv[]) { DOC *docs; /* training examples */ long max_docs,max_words_doc; long totwords,totdoc,ll,i; long kernel_cache_size; double *target; KERNEL_CACHE kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; read_input_parameters(argc,argv,docfile,modelfile,&verbosity, &kernel_cache_size,&learn_parm,&kernel_parm); if(verbosity>=1) { printf("Scanning examples..."); fflush(stdout); } nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ max_words_doc+=10; ll+=10; max_docs+=2; if(verbosity>=1) { printf("done\n"); fflush(stdout); } docs = (DOC *)my_malloc(sizeof(DOC)*max_docs); /* feature vectors */ target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll); read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm); printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords); //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ??????? if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } } else { if(learn_parm.type == CLASSIFICATION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size); svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == REGRESSION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size); svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == RANKING) { printf("Learning rankings is not implemented for non-linear kernels in this version!\n"); exit(1); } else if(learn_parm.type == PERCEPTRON) { perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model,modelfile); } else if(learn_parm.type == PERCEPTRON_BATCH) { batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache_size,&model); } } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ write_model(modelfile,&model); free(model.supvec); free(model.alpha); free(model.index); for(i=0;i<totdoc;i++){ freeExample(&docs[i]); } free(docs); free(target); return(0); }