static void bm_free_extra_models() { while (N_polygon_models > N_D2_POLYGON_MODELS) free_model(&Polygon_models[--N_polygon_models]); while (N_polygon_models > exit_modelnum) free_model(&Polygon_models[--N_polygon_models]); }
Classifier::~Classifier() { for (int i = 0; i < nFeatures; i++) delete featureExtractors[i]; for (unsigned int i = 0; i < Globals::numZones; i++) free_model(models[i], 1); }
void free_struct_model(STRUCTMODEL sm) { /* Frees the memory of model. */ /* if(sm.w) free(sm.w); */ /* this is free'd in free_model */ if(sm.svm_model) free_model(sm.svm_model,1); /* add free calls for user defined data here */ }
int main (int argc, char* argv[]) { MODEL *model; read_input_parameters(argc,argv,modelfile,outfile, &verbosity, &format); if (format) { model=read_binary_model(modelfile); } else { model=read_model(modelfile); if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } } if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ FILE* modelfl = fopen (outfile, "wb"); if (modelfl==NULL) { perror (modelfile); exit (1); } if (verbosity > 1) fprintf(modelfl,"B=%.32g\n",model->b); long i=0; for (i= 0; i< model->totwords; ++i) fprintf(modelfl,"%.32g\n",model->lin_weights[i]); } else { fprintf(stderr,"No output besides linear models\n"); } free_model(model,1); return(0); }
int main (void) { hid_t file_id; int status; detector_data *data; model *mod = NULL; double *data_stream = NULL; unsigned int *qual = NULL; /* open file -- ignore failure on this step */ status = RD_open_file(TESTDATAFILE, &file_id); if (status) return TEST_EXIT_SKIP; /* initialize data struct */ status = RD_init_data(file_id, 0, -1, &data); if (status) return TEST_EXIT_SKIP; /* get data */ status = RD_get_data(file_id, data); if (status) return TEST_EXIT_SKIP; /* close file */ status = RD_close_file(file_id); if (status) return TEST_EXIT_SKIP; /* setup flt model */ status = setup_model(MODEL_TYPE_FLT, &mod, data, FILT_NPAD, FILT_SCALE); if (status) return TEST_EXIT_SKIP; /* initialize flt model */ status = init_model(mod, data); if (status) return TEST_EXIT_SKIP; /* run flt model */ data_stream = malloc(mod->ndet*mod->nsamp*sizeof(*data_stream)); memcpy(data_stream, data->signal, mod->ndet*mod->nsamp*sizeof(*data_stream)); qual = malloc(mod->ndet*mod->nsamp*sizeof(*qual)); memcpy(qual, data->qual, mod->ndet*mod->nsamp*sizeof(*qual)); status = model_flt_calc(0, 1, mod, data_stream, qual); if (status) return EXIT_FAILURE; free(data_stream); free(qual); /* free flt model */ status = free_model(mod); if (status) return TEST_EXIT_SKIP; /* free data */ status = RD_free_data(data); if (status) return TEST_EXIT_SKIP; /* success! */ return EXIT_SUCCESS; }
void free_polygon_models() { int i; for (i=0;i<N_polygon_models;i++) { free_model(&Polygon_models[i]); } }
int main(int argc, char **argv) { // Load the model from the file given as first non-option argument. // Normalization constant Z, will be calculated (takes long) if not provided. double Z = 0; int opt; while((opt = getopt(argc, argv, "z:")) != -1) { switch(opt) { case 'z': Z = atof(optarg); break; default: die("%s", usage); } } if (optind == argc) die("%s", usage); msg("Loading model from %s.", argv[optind]); if (Z > 0) msg("Will use fixed Z of %g.", Z); else msg("Will calculate Z, this may take some time..."); model_t m = load_model(argv[optind], Z); // Read data from stdin and calculate logL char **toks = _d_calloc(m->ntok, sizeof(char *)); svec_t *x = _d_calloc(m->ntok, sizeof(svec_t)); double *logZ = _d_calloc(m->ntok, sizeof(double)); for (size_t i = 1; i < m->ntok; i++) { logZ[i] = log(m->z[i]); } double logL = 0; size_t nline = 0; msg("Reading data from stdin (each dot = 1M lines)..."); forline (line, NULL) { if ((++nline & ((1<<20)-1)) == 0) fputc('.', stderr); line[strlen(line)-1] = 0; // chop newline size_t ntok = split(line, "\t", toks, m->ntok); if (ntok != m->ntok) die("Wrong number of columns."); for (size_t i = 0; i < m->ntok; i++) { if (*toks[i] == '\0') { x[i] = NULL; } else { x[i] = svec(m->v[i], toks[i]); assert(x[i]->vec != NULL && x[i]->cnt > 0); } } assert(x[0] != NULL); double logx = log(((double) x[0]->cnt) / m->n[0]); for (size_t i = 1; i < m->ntok; i++) { if (x[i] == NULL) continue; double logy = log(((double) x[i]->cnt) / m->n[i]); logL += logx + logy - logZ[i] - d2(x[0]->vec, x[i]->vec, m->ndim); } } fputc('\n', stderr); logL /= nline; _d_free(toks); _d_free(x); _d_free(logZ); free_model(m); msg("nlines=%zu avg-logL=%g", nline, logL); }
int main(int argc, char **argv) { w2v_t w2v; if (load_model(argv[1], &w2v) < 0) { return -1; } char st1[kMaxSize], st2[kMaxSize]; while (fscanf(stdin, "%s\t%s", st1, st2) == 2) { fprintf(stdout, "%s\t%s\t%f\n", st1, st2, cosine(&w2v, st1, st2)); } free_model(&w2v); return 0; }
int parse_model(const char* filename, track_model_t* model) { FILE* file; file = fopen(filename, "r"); if (file == 0) { fprintf(stderr, "Could not open %s.\n", filename); return -1; } model->nodes = 0; int rval = parse_model_file(file, model); if (rval < 0) { free_model(model); } fclose(file); return rval; }
int main (void) { hid_t file_id; int status; detector_data *data; model *mod; UNUSED unsigned long memused; /* open file -- ignore failure on this step */ status = RD_open_file(TESTDATAFILE, &file_id); if (status) return TEST_EXIT_SKIP; /* initialize data struct */ status = RD_init_data(file_id, 0, -1, &data); if (status) return TEST_EXIT_SKIP; /* get data */ status = RD_get_data(file_id, data); if (status) return TEST_EXIT_SKIP; /* close file */ status = RD_close_file(file_id); if (status) return TEST_EXIT_SKIP; /* setup model */ status = setup_model(MODEL_TYPE_AST, &mod, data); if (status) return TEST_EXIT_SKIP; /* get memory */ memused = getmem_model(mod); /* free ast model */ status = free_model(mod); if (status) return EXIT_FAILURE; /* free data */ status = RD_free_data(data); if (status) return TEST_EXIT_SKIP; /* success! */ return EXIT_SUCCESS; }
void print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm, CONSTSET cset, double *alpha, STRUCT_LEARN_PARM *sparm) { /* This function is called after training and allows final touches to the model sm. But primarly it allows computing and printing any kind of statistic (e.g. training error) you might want. */ /* Replace SV with single weight vector */ MODEL *model=sm->svm_model; if(model->kernel_parm.kernel_type == LINEAR) { if(struct_verbosity>=1) { printf("Compacting linear model..."); fflush(stdout); } sm->svm_model=compact_linear_model(model); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ free_model(model,1); if(struct_verbosity>=1) { printf("done\n"); fflush(stdout); } } }
int main( int argc, char *argv[] ) { int ret = EXIT_FAILURE; serial_port_t port; struct model *model = malloc(sizeof(struct model)); struct activity *activity = malloc(sizeof(struct activity)); model->num_theta_region = CONF_NUM_THETA_REGION; char chosen_username[NAME_LEN]; char chosen_activity[NAME_LEN]; if (prompt_activity_name(chosen_activity)) goto ERROR; if (prompt_user_name(chosen_username)) goto ERROR; strncpy(activity->name, chosen_activity, NAME_LEN); strncpy(activity->user, chosen_username, NAME_LEN); activity->name[strlen(chosen_activity)] = '\0'; activity->user[strlen(chosen_username)] = '\0'; if (init_model(model)) goto ERROR; if (init_activity(activity, model)) goto ERROR; if (check_file_exists(FILE_CONF_CALIBRATION)) goto ERROR; if (serial_port_open(CONF_SERIAL_PORT, &port)) goto ERROR; if (serial_port_configure(port)) goto ERROR; if (real_time_listen(port, activity, model)) goto ERROR; file_write_activity_code(activity->files[FILE_ACTIVITY_CODE], activity, model); ret = EXIT_SUCCESS; ERROR: serial_port_close(port); free_activity(activity); free_model(model); return ret; }
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long newconstraints=0, totconstraints=0, activenum=0; int opti_round, *opti, fullround, use_shrinking; long old_totconstraints=0; double epsilon,svmCnorm; long tolerance,new_precision=1,dont_stop=0; double lossval,factor,dist; double margin=0; double slack, *slacks, slacksum, ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0,lastoptcount=0; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f, **fycache=NULL; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0; double rt1,rt2; rt1=get_runtime(); init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize shrinking-style example selection heuristic */ if(alg_type == NSLACK_SHRINK_ALG) use_shrinking=1; else use_shrinking=0; opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; /* normalize regularization parameter C by the number of training examples */ svmCnorm=sparm->C/n; if(sparm->slack_norm == 1) { lparm->svm_c=svmCnorm; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR_KERNEL) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=100.0; /* start with low precision and increase later */ tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ if(USE_FYCACHE) { fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ free_svector(fy); fy=diff; } fycache[i]=fy; } } rt_init+=MAX(get_runtime()-rt1,0); rt_total+=MAX(get_runtime()-rt1,0); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.49999999999,sparm->epsilon); new_precision=1; if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ opti_round++; activenum=n; dont_stop=0; old_totconstraints=totconstraints; do { /* with shrinking turned on, go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("Iter %i (%ld active): ",++numIt,activenum); fflush(stdout); } ceps=0; fullround=(activenum == n); for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if((!use_shrinking) || (opti[i] != opti_round)) { /* if the example is not shrunk away, then see if it is necessary to add a new constraint */ rt2=get_runtime(); argmax_count++; if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); rt_viol+=MAX(get_runtime()-rt2,0); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ rt2=get_runtime(); if(fycache) fy=copy_svector(fycache[i]); else fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); rt_psi+=MAX(get_runtime()-rt2,0); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*svmCnorm)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ dist=classify_example(svmModel,doc); ceps=MAX(ceps,margin-dist-slack); if(slack > (margin-dist+0.0001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist); /* exit(1); */ } if((dist+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*svmCnorm)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,NULL,1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); alphahist[cset.m-1]=optcount; newconstraints++; totconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1)) || (new_precision && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; rt_opt+=MAX(get_runtime()-rt2,0); if(new_precision && (epsilon <= sparm->epsilon)) dont_stop=1; /* make sure we take one final pass */ new_precision=0; newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=2) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist, MAX(50,optcount-lastoptcount)); lastoptcount=optcount; if(struct_verbosity>=2) printf("done. (NumConst=%d)\n",cset.m); rt_total+=MAX(get_runtime()-rt1,0); } while(use_shrinking && (activenum > 0)); /* when using shrinking, repeat until all examples produced no constraint at least once */ } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop); } while((epsilon > sparm->epsilon) || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm)); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ /**** WARNING: If positivity constraints are used, then the maximum slack id is larger than what is allocated below ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm)))); } slacksum=0; for(i=1; i<=n; i++) slacksum+=slacks[i]; free(slacks); alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; modellength=model_length_s(svmModel); dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps)) -(alphasum-0.5*modellength*modellength); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n",modellength); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", modellength); } printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(fycache) { for(i=0;i<n;i++) free_svector(fycache[i]); free(fycache); } if(svmModel) free_model(svmModel,0); free(alpha); free(alphahist); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
static void bm_free_extra_models() { const auto base = std::min(N_D2_POLYGON_MODELS.value, exit_modelnum); range_for (auto &p, partial_range(Polygon_models, base, exchange(N_polygon_models, base))) free_model(p); }
void load_robot_replacements(const d_fname &level_name) { int t,i,j; char ifile_name[FILENAME_LEN]; change_filename_extension(ifile_name, level_name, ".HXM" ); auto fp = PHYSFSX_openReadBuffered(ifile_name); if (!fp) //no robot replacement file return; t = PHYSFSX_readInt(fp); //read id "HXM!" if (t!= 0x21584d48) Error("ID of HXM! file incorrect"); t = PHYSFSX_readInt(fp); //read version if (t<1) Error("HXM! version too old (%d)",t); t = PHYSFSX_readInt(fp); //read number of robots for (j=0;j<t;j++) { i = PHYSFSX_readInt(fp); //read robot number if (i<0 || i>=N_robot_types) Error("Robots number (%d) out of range in (%s). Range = [0..%d].",i,static_cast<const char *>(level_name),N_robot_types-1); robot_info_read(fp, Robot_info[i]); } t = PHYSFSX_readInt(fp); //read number of joints for (j=0;j<t;j++) { i = PHYSFSX_readInt(fp); //read joint number if (i<0 || i>=N_robot_joints) Error("Robots joint (%d) out of range in (%s). Range = [0..%d].",i,static_cast<const char *>(level_name),N_robot_joints-1); jointpos_read(fp, Robot_joints[i]); } t = PHYSFSX_readInt(fp); //read number of polygon models for (j=0;j<t;j++) { i = PHYSFSX_readInt(fp); //read model number if (i<0 || i>=N_polygon_models) Error("Polygon model (%d) out of range in (%s). Range = [0..%d].",i,static_cast<const char *>(level_name),N_polygon_models-1); free_model(Polygon_models[i]); polymodel_read(&Polygon_models[i], fp); polygon_model_data_read(&Polygon_models[i], fp); Dying_modelnums[i] = PHYSFSX_readInt(fp); Dead_modelnums[i] = PHYSFSX_readInt(fp); } t = PHYSFSX_readInt(fp); //read number of objbitmaps for (j=0;j<t;j++) { i = PHYSFSX_readInt(fp); //read objbitmap number if (i < 0 || i >= ObjBitmaps.size()) Error("Object bitmap number (%d) out of range in (%s). Range = [0..%" DXX_PRI_size_type "].", i, static_cast<const char *>(level_name), ObjBitmaps.size() - 1); bitmap_index_read(fp, ObjBitmaps[i]); } t = PHYSFSX_readInt(fp); //read number of objbitmapptrs for (j=0;j<t;j++) { i = PHYSFSX_readInt(fp); //read objbitmapptr number if (i < 0 || i >= ObjBitmapPtrs.size()) Error("Object bitmap pointer (%d) out of range in (%s). Range = [0..%" DXX_PRI_size_type "].", i, static_cast<const char *>(level_name), ObjBitmapPtrs.size() - 1); ObjBitmapPtrs[i] = PHYSFSX_readShort(fp); } Robot_replacements_loaded = 1; }
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j; double *alpha; DOC **dXc; // constraint matrix double *delta; // rhs of constraints SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **G = NULL; double **G2 = NULL; double **qmatrix = NULL; SVECTOR *f; int r; // set parameters for hideo solver LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; // changed from 1e-15 lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; //qmatrix = (double **) malloc(sizeof(double *)*10); //assert(qmatrix!=NULL); printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); // add constraint dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; //alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); //assert(alpha!=NULL); //for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){ // alpha[j] = 0.0; //} alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active); assert(qmatrix!=NULL); qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size)); for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){ qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1)); } // update Gram matrix G = (double **) realloc(G, sizeof(double *)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for(j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for(j = 0; j < size_active-1; j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[size_active-1][j] = G[size_active-1][j]/2; G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); // hack: add a constant to the diagonal to make sure G is PSD G[size_active-1][size_active-1] += 1e-6; // solve QP to update alpha //r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size)); r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); fflush(stdout); //exit(1); while(r==1295) { printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization); fflush(stdout); for(i=0;i<size_active;i++) { G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization; } sparm->gram_regularization *= 10; r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1); } } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]); idle[j] = 0; } else idle[j]++; } for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){ if (alpha[size_active+j] > EQUALITY_EPSILON){ w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j]; } } for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ //assert(w[j] <= 0); if(w[j]>0){ printf("j = %ld, w[j] = %0.6f\n", j, w[j]); fflush(stdout); } } cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); for(i = 0; i < size_active; i++) { cur_slack[i] = 0.0; for(f = dXc[i]->fvec; f; f = f->next) { j = 0; while(f->words[j].wnum) { cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight; j++; } } if(cur_slack[i] >= delta[i]) cur_slack[i] = 0.0; else cur_slack[i] = delta[i]-cur_slack[i]; } mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } } if(size_active > 1) threshold = cur_slack[mv_iter]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter); } free(alpha); alpha=NULL; } // end cutting plane while loop //primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); // free memory for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],1); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); free(idle); if (svm_model!=NULL) free_model(svm_model,0); //return(primal_obj); return; }
int _svm_learn (int argc, char* argv[]) { char docfile[200]; /* file with training examples */ char modelfile[200]; /* file for resulting classifier */ char restartfile[200]; /* file with initial alphas */ DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); HIDEO_ENV *hideo_env=create_env(); model->td_pred=NULL; model->n_td_pred=0; _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); read_documents(docfile,&docs,&target,&totwords,&totdoc); if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ write_model(modelfile,model); free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); free_env(hideo_env); return(0); }
int main_classify (int argc, char* argv[]) { DOC *doc; /* test example */ WORDSVM *words; long max_docs,max_words_doc,lld; long totdoc=0,queryid,slackid; long correct=0,incorrect=0,no_accuracy=0; long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; long j; double t1,runtime=0; double dist,doc_label,costfactor; char *line,*comment; FILE *predfl,*docfl; MODEL *model; read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, &verbosity,&pred_format); nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ max_words_doc+=2; lld+=2; line = (char *)my_malloc(sizeof(char)*lld); words = (WORDSVM *)my_malloc(sizeof(WORDSVM)*(max_words_doc+10)); model=read_model(modelfile); if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } if(verbosity>=2) { printf("Classifying test examples.."); fflush(stdout); } if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); exit (1); } if ((predfl = fopen (predictionsfile, "w")) == NULL) { perror (predictionsfile); exit (1); } while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { if(line[0] == '#') continue; /* line contains comments */ parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, max_words_doc,&comment); totdoc++; if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ if((words[j]).wnum>model->totwords) /* are not larger than in */ (words[j]).wnum=0; /* model. Remove feature if */ } /* necessary. */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example_linear(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } else { /* non-linear kernel */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } if(dist>0) { if(pred_format==0) { /* old weired output format */ fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); } if(doc_label>0) correct++; else incorrect++; if(doc_label>0) res_a++; else res_b++; } else { if(pred_format==0) { /* old weired output format */ fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); } if(doc_label<0) correct++; else incorrect++; if(doc_label>0) res_c++; else res_d++; } if(pred_format==1) { /* output the value of decision function */ fprintf(predfl,"%.8g\n",dist); } if((int)(0.01+(doc_label*doc_label)) != 1) { no_accuracy=1; } /* test data is not binary labeled */ if(verbosity>=2) { if(totdoc % 100 == 0) { printf("%ld..",totdoc); fflush(stdout); } } } free(line); free(words); free_model(model,1); if(verbosity>=2) { printf("done\n"); /* Note by Gary Boone Date: 29 April 2000 */ /* o Timing is inaccurate. The timer has 0.01 second resolution. */ /* Because classification of a single vector takes less than */ /* 0.01 secs, the timer was underflowing. */ printf("Runtime (without IO) in cpu-seconds: %.2f\n", (float)(runtime/100.0)); } if((!no_accuracy) && (verbosity>=1)) { printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); } return(0); }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j,t; double *alpha; DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **psiDiffs = NULL; SVECTOR *f; int r; long fnum, last_wnum; /* set parameters for hideo solver */ LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; /* changed from 1e-15 */ lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; /*alpha = (double*)realloc(alpha, sizeof(double)*size_active); assert(alpha!=NULL); alpha[size_active-1] = 0.0;*/ /*idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0;*/ /* update Gram matrix */ psiDiffs = (double **) realloc(psiDiffs, sizeof(double *)*size_active); assert(psiDiffs!=NULL); psiDiffs[size_active-1] = NULL; psiDiffs[size_active-1] = (double *) realloc(psiDiffs[size_active-1], sizeof(double)*((sparm->phi1_size+sparm->phi2_size)*3)); assert(psiDiffs[size_active-1]!=NULL); fnum = 0; last_wnum = 0; while(dXc[size_active-1]->fvec->words[fnum].wnum) { for (t = last_wnum+1; t < dXc[size_active-1]->fvec->words[fnum].wnum; t++) { psiDiffs[size_active-1][t-1] = 0; } psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = dXc[size_active-1]->fvec->words[fnum].weight; /*if((psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]<EQUALITY_EPSILON) && (psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]>(-1*EQUALITY_EPSILON))){ psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = 0; }*/ last_wnum = dXc[size_active-1]->fvec->words[fnum].wnum; fnum++; } for (t = (last_wnum+1); t <= (sparm->phi1_size+sparm->phi2_size)*3; t++) { psiDiffs[size_active-1][t-1] = 0; } /* solve QP to update w */ clear_nvector(w,sm->sizePsi); //cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); cur_slack = (double *) realloc(cur_slack,sizeof(double)); r = mosek_qp_optimize(psiDiffs, delta, w, cur_slack, (long) size_active, C, &cur_obj, (sparm->phi1_size+sparm->phi2_size)*3, (sparm->phi1_size+sparm->phi2_size)*2); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); exit(1); } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } for(j = 1; j <= (sparm->phi1_size+sparm->phi2_size)*3; j++) { if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } /*for (j=0;j<size_active;j++) { if (cur_slack[j]>ALPHA_THRESHOLD) { idle[j] = 0; } else idle[j]++; }*/ /*mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } }*/ if(size_active > 1) //threshold = cur_slack[mv_iter]; threshold = cur_slack[0]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); /*if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &cur_slack, &delta, &dXc, &psiDiffs, &mv_iter); }*/ } // end cutting plane while loop primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0;j<size_active;j++) { free(psiDiffs[j]); free_example(dXc[j],1); } free(psiDiffs); free(dXc); //free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); //free(idle); if (svm_model!=NULL) free_model(svm_model,0); return(primal_obj); }
int SVMLightRunner::librarySVMClassifyMain( int argc, char **argv, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMClassifyMain() Started." ); DOC *doc; /* test example */ WORD *words; long max_docs,max_words_doc,lld; long totdoc=0,queryid,slackid; long correct=0,incorrect=0,no_accuracy=0; long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; long j; double t1,runtime=0; double dist,doc_label,costfactor; char *line,*comment; FILE *predfl,*docfl; MODEL *model; // GMUM.R changes { librarySVMClassifyReadInputParameters( argc, argv, docfile, modelfile, predictionsfile, &verbosity, &pred_format, use_gmumr, config); if (!use_gmumr) { nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ lld+=2; line = (char *)my_malloc(sizeof(char)*lld); } else { max_docs = config.target.n_rows; max_words_doc = config.getDataDim(); config.result = arma::zeros<arma::vec>(max_docs); // Prevent writing to the file pred_format = -1; // lld used only for file reading } max_words_doc+=2; words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); // GMUM.R changes } model=libraryReadModel(modelfile, use_gmumr, config); // GMUM.R changes } if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } if(verbosity>=2) { C_PRINTF("Classifying test examples.."); C_FFLUSH(stdout); } // GMUM.R changes { bool newline; if (!use_gmumr) { if ((predfl = fopen (predictionsfile, "w")) == NULL) { perror (predictionsfile); EXIT (1); } if ((docfl = fopen (docfile, "r")) == NULL) { perror (docfile); EXIT (1); } newline = (!feof(docfl)) && fgets(line,(int)lld,docfl); } else { newline = false; if (totdoc < config.getDataExamplesNumber()) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } while(newline) { if (use_gmumr) { std::string stringline = ""; } // GMUM.R changes } if(line[0] == '#') continue; /* line contains comments */ parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, max_words_doc,&comment); totdoc++; if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ if((words[j]).wnum>model->totwords) /* are not larger than in */ (words[j]).wnum=0; /* model. Remove feature if */ } /* necessary. */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example_linear(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } else { /* non-linear kernel */ doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); t1=get_runtime(); dist=classify_example(model,doc); runtime+=(get_runtime()-t1); free_example(doc,1); } if(dist>0) { if(pred_format==0) { /* old weired output format */ C_FPRINTF(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); } if(doc_label>0) correct++; else incorrect++; if(doc_label>0) res_a++; else res_b++; } else { if(pred_format==0) { /* old weired output format */ C_FPRINTF(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); } if(doc_label<0) correct++; else incorrect++; if(doc_label>0) res_c++; else res_d++; } if(pred_format==1) { /* output the value of decision function */ C_FPRINTF(predfl,"%.8g\n",dist); } if((int)(0.01+(doc_label*doc_label)) != 1) { no_accuracy=1; } /* test data is not binary labeled */ if(verbosity>=2) { if(totdoc % 100 == 0) { C_PRINTF("%ld..",totdoc); C_FFLUSH(stdout); } } // GMUM.R changes { if (!use_gmumr) { newline = (!feof(docfl)) && fgets(line,(int)lld,docfl); } else { newline = false; // Store prediction result in config config.result[totdoc-1] = dist; // Read next line if (totdoc < config.getDataExamplesNumber()) { newline = true; std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc); line = new char[str.size() + 1]; std::copy(str.begin(), str.end(), line); line[str.size()] = '\0'; } } } if (!use_gmumr) { fclose(predfl); fclose(docfl); free(line); } // GMUM.R changes } free(words); free_model(model,1); if(verbosity>=2) { C_PRINTF("done\n"); /* Note by Gary Boone Date: 29 April 2000 */ /* o Timing is inaccurate. The timer has 0.01 second resolution. */ /* Because classification of a single vector takes less than */ /* 0.01 secs, the timer was underflowing. */ C_PRINTF("Runtime (without IO) in cpu-seconds: %.2f\n", (float)(runtime/100.0)); } if((!no_accuracy) && (verbosity>=1)) { C_PRINTF("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); C_PRINTF("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); } return(0); }
MedSTC::~MedSTC(void) { free_model(); }
/************************************************************************* * Entry point for pmp_bf *************************************************************************/ int main(int argc, char *argv[]) { char* bg_filename = NULL; char* motif_name = "motif"; // Use this motif name in the output. STRING_LIST_T* selected_motifs = NULL; double fg_rate = 1.0; double bg_rate = 1.0; double purine_pyrimidine = 1.0; // r double transition_transversion = 0.5; // R double pseudocount = 0.1; GAP_SUPPORT_T gap_support = SKIP_GAPS; MODEL_TYPE_T model_type = F81_MODEL; BOOLEAN_T use_halpern_bruno = FALSE; char* ustar_label = NULL; // TLB; create uniform star tree int i; program_name = "pmp_bf"; /********************************************** * COMMAND LINE PROCESSING **********************************************/ // Define command line options. (FIXME: Repeated code) // FIXME: Note that if you add or remove options you // must change n_options. int n_options = 12; cmdoption const pmp_options[] = { {"hb", NO_VALUE}, {"ustar", REQUIRED_VALUE}, {"model", REQUIRED_VALUE}, {"pur-pyr", REQUIRED_VALUE}, {"transition-transversion", REQUIRED_VALUE}, {"bg", REQUIRED_VALUE}, {"fg", REQUIRED_VALUE}, {"motif", REQUIRED_VALUE}, {"motif-name", REQUIRED_VALUE}, {"bgfile", REQUIRED_VALUE}, {"pseudocount", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE} }; int option_index = 0; // Define the usage message. char usage[1000] = ""; strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n"); strcat(usage, "\n"); strcat(usage, " Options:\n"); // Evolutionary model parameters. strcat(usage, " --hb\n"); strcat(usage, " --model single|average|jc|k2|f81|f84|hky|tn"); strcat(usage, " (default=f81)\n"); strcat(usage, " --pur-pyr <float> (default=1.0)\n"); strcat(usage, " --transition-transversion <float> (default=0.5)\n"); strcat(usage, " --bg <float> (default=1.0)\n"); strcat(usage, " --fg <float> (default=1.0)\n"); // Motif parameters. strcat(usage, " --motif <id> (default=all)\n"); strcat(usage, " --motif-name <string> (default from motif file)\n"); // Miscellaneous parameters strcat(usage, " --bgfile <background> (default from motif file)\n"); strcat(usage, " --pseudocount <float> (default=0.1)\n"); strcat(usage, " --ustar <label>\n"); // TLB; create uniform star tree strcat(usage, " --verbosity [1|2|3|4] (default 2)\n"); strcat(usage, "\n Prints the FP and FN rate at each of 10000 score values.\n"); strcat(usage, "\n Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n"); // Parse the command line. if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) { die("Error processing command line options: option name too long.\n"); } while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); die("Error processing command line options (%s)\n", message); } if (strcmp(option_name, "model") == 0) { if (strcmp(option_value, "jc") == 0) { model_type = JC_MODEL; } else if (strcmp(option_value, "k2") == 0) { model_type = K2_MODEL; } else if (strcmp(option_value, "f81") == 0) { model_type = F81_MODEL; } else if (strcmp(option_value, "f84") == 0) { model_type = F84_MODEL; } else if (strcmp(option_value, "hky") == 0) { model_type = HKY_MODEL; } else if (strcmp(option_value, "tn") == 0) { model_type = TAMURA_NEI_MODEL; } else if (strcmp(option_value, "single") == 0) { model_type = SINGLE_MODEL; } else if (strcmp(option_value, "average") == 0) { model_type = AVERAGE_MODEL; } else { die("Unknown model: %s\n", option_value); } } else if (strcmp(option_name, "hb") == 0){ use_halpern_bruno = TRUE; } else if (strcmp(option_name, "ustar") == 0){ // TLB; create uniform star tree ustar_label = option_value; } else if (strcmp(option_name, "pur-pyr") == 0){ purine_pyrimidine = atof(option_value); } else if (strcmp(option_name, "transition-transversion") == 0){ transition_transversion = atof(option_value); } else if (strcmp(option_name, "bg") == 0){ bg_rate = atof(option_value); } else if (strcmp(option_name, "fg") == 0){ fg_rate = atof(option_value); } else if (strcmp(option_name, "motif") == 0){ if (selected_motifs == NULL) { selected_motifs = new_string_list(); } add_string(option_value, selected_motifs); } else if (strcmp(option_name, "motif-name") == 0){ motif_name = option_value; } else if (strcmp(option_name, "bgfile") == 0){ bg_filename = option_value; } else if (strcmp(option_name, "pseudocount") == 0){ pseudocount = atof(option_value); } else if (strcmp(option_name, "verbosity") == 0){ verbosity = atoi(option_value); } } // Must have tree and motif file names if (argc != option_index + 2) { fprintf(stderr, "%s", usage); exit(EXIT_FAILURE); } /********************************************** * Read the phylogenetic tree. **********************************************/ char* tree_filename = NULL; TREE_T* tree = NULL; tree_filename = argv[option_index]; option_index++; tree = read_tree_from_file(tree_filename); // get the species names STRING_LIST_T* alignment_species = make_leaf_list(tree); char *root_label = get_label(tree); // in case target in center if (strlen(root_label)>0) add_string(root_label, alignment_species); //write_string_list(" ", alignment_species, stderr); // TLB; Convert the tree to a uniform star tree with // the target sequence at its center. if (ustar_label != NULL) { tree = convert_to_uniform_star_tree(tree, ustar_label); if (tree == NULL) die("Tree or alignment missing target %s\n", ustar_label); if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Target %s placed at center of uniform (d=%.3f) star tree:\n", ustar_label, get_total_length(tree) / get_num_children(tree) ); write_tree(tree, stderr); } } /********************************************** * Read the motifs. **********************************************/ char* meme_filename = argv[option_index]; option_index++; int num_motifs = 0; MREAD_T *mread; ALPH_T alph; ARRAYLST_T *motifs; ARRAY_T *bg_freqs; mread = mread_create(meme_filename, OPEN_MFILE); mread_set_bg_source(mread, bg_filename); mread_set_pseudocount(mread, pseudocount); // read motifs motifs = mread_load(mread, NULL); alph = mread_get_alphabet(mread); bg_freqs = mread_get_background(mread); // check if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename); // TLB; need to resize bg_freqs array to ALPH_SIZE items // or copy array breaks in HB mode. This throws away // the freqs for the ambiguous characters; int asize = alph_size(alph, ALPH_SIZE); resize_array(bg_freqs, asize); /************************************************************** * Compute probability distributions for each of the selected motifs. **************************************************************/ int motif_index; for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) { MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs); char* motif_id = get_motif_id(motif); char* bare_motif_id = motif_id; // We may have specified on the command line that // only certain motifs were to be used. if (selected_motifs != NULL) { if (*bare_motif_id == '+' || *bare_motif_id == '-') { // The selected motif id won't included a strand indicator. bare_motif_id++; } if (have_string(bare_motif_id, selected_motifs) == FALSE) { continue; } } if (verbosity >= NORMAL_VERBOSE) { fprintf( stderr, "Using motif %s of width %d.\n", motif_id, get_motif_length(motif) ); } // Build an array of evolutionary models for each position in the motif. EVOMODEL_T** models = make_motif_models( motif, bg_freqs, model_type, fg_rate, bg_rate, purine_pyrimidine, transition_transversion, use_halpern_bruno ); // Get the frequencies under the background model (row 0) // and position-dependent scores (rows 1..w) // for each possible alignment column. MATRIX_T* pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, get_motif_length(motif) + 1, models, tree, gap_support ); ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs); remove_matrix_row(0, pssm_matrix); // throw away first row //print_col_frequencies(alph, alignment_col_freqs); // // Get the position-dependent null model alignment column frequencies // int w = get_motif_length(motif); int ncols = get_num_cols(pssm_matrix); MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols); for (i=0; i<w; i++) { // get the evo model corresponding to this column of the motif // and store it as the first evolutionary model. myfree(models[0]); // Use motif PSFM for equilibrium freqs. for model. ARRAY_T* site_specific_freqs = allocate_array(asize); int j = 0; for(j = 0; j < asize; j++) { double value = get_matrix_cell(i, j, get_motif_freqs(motif)); set_array_item(j, value, site_specific_freqs); } if (use_halpern_bruno == FALSE) { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, site_specific_freqs, NULL ); } else { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, bg_freqs, site_specific_freqs ); } // get the alignment column frequencies using this model MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, 2, // only interested in freqs under bkg models, tree, gap_support ); // assemble the position-dependent background alignment column freqs. set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg); // chuck the pssm (not his real name) free_matrix(tmp_pssm_matrix); } // // Compute and print the score distribution under the background model // and under the (position-dependent) motif model. // int range = 10000; // 10^4 gives same result as 10^5, but 10^3 differs // under background model PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); // under position-dependent background (motif) model PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); get_pv_lookup_pos_dep( pssm_pos_dep, pos_dep_bkg, NULL // no priors used ); // print FP and FN distributions int num_items = get_pssm_pv_length(pssm_pos_dep); for (i=0; i<num_items; i++) { double pvf = get_pssm_pv(i, pssm); double pvt = get_pssm_pv(i, pssm_pos_dep); double fpr = pvf; double fnr = 1 - pvt; if (fpr >= 0.99999 || fnr == 0) continue; printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr); } // free stuff free_pssm(pssm); free_pssm(pssm_pos_dep); if (models != NULL) { int model_index; int num_models = get_motif_length(motif) + 1; for (model_index = 0; model_index < num_models; model_index++) { free_model(models[model_index]); } myfree(models); } } // motif arraylst_destroy(destroy_motif, motifs); /********************************************** * Clean up. **********************************************/ // TLB may have encountered a memory corruption bug here // CEG has not been able to reproduce it. valgrind says all is well. free_array(bg_freqs); free_tree(TRUE, tree); free_string_list(selected_motifs); return(0); } // main
int main(int argc, char** argv) { std::string source_file; std::string output_file; try { namespace po=boost::program_options; po::options_description desc("Options"); desc.add_options() ("help,h", "Print help messages") ("source,s", po::value<std::string>(&source_file)->required(), "Specify an source file") ("output,o", po::value<std::string>(&output_file)->default_value(boost::filesystem::current_path().string<std::string>()+"/detector.data"), "Specify an output file"); po::positional_options_description p; p.add("source",-1); po::variables_map vm; po::store(po::command_line_parser(argc,argv).options(desc).positional(p).run(), vm); if (vm.count("help")) { std::cout << "Usage: " << argv[0] << " [options] source" << std::endl; std::cout << desc; return 0; } po::notify(vm); } catch(std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; return 1; } catch(...) { std::cerr << "Exception of unknown type!" << std::endl; return 1; } std::vector<float> single_detector_vector; std::vector<unsigned int> single_detector_vector_indices; char model_file[source_file.size()+1]; strcpy(model_file, source_file.c_str()); MODEL *model=read_model(model_file); DOC** supveclist = model->supvec; single_detector_vector.clear(); single_detector_vector.resize(model->totwords, 0.); for (long ssv = 1; ssv < model->sv_num; ++ssv) { DOC* single_support_vector = supveclist[ssv]; SVECTOR* single_support_vector_values = single_support_vector->fvec; WORD single_support_vector_component; for (long singleFeature = 0; singleFeature < model->totwords; ++singleFeature) { single_support_vector_component = single_support_vector_values->words[singleFeature]; single_detector_vector.at(single_support_vector_component.wnum-1) += (single_support_vector_component.weight * model->alpha[ssv]); } } free_model(model,1); std::ofstream result_data; result_data.open(output_file.c_str(), std::ofstream::out|std::ofstream::app); for(std::vector<float>::iterator iter=single_detector_vector.begin(); iter!=single_detector_vector.end(); iter++) { result_data << *iter << std::endl; } }
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long totconstraints=0; long kernel_type_org; double epsilon,epsilon_cached; double lhsXw,rhs_i; double rhs=0; double slack,ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0; CONSTSET cset; SVECTOR *diff=NULL; double *lhs_n=NULL; SVECTOR *fy, *fydelta, **fycache, *lhs; MODEL *svmModel=NULL; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0; double rt_cacheupdate=0,rt_cacheconst=0,rt_cacheadd=0,rt_cachesum=0; double rt1=0,rt2=0; long progress; /* SVECTOR ***fydelta_cache=NULL; double **loss_cache=NULL; int cache_size=0; */ CCACHE *ccache=NULL; int cached_constraint; double viol,viol_est,epsilon_est=0; long uptr=0; long *randmapping=NULL; long batch_size=n; rt1=get_runtime(); if(sparm->batch_size<100) batch_size=sparm->batch_size*n/100.0; init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); fflush(stdout); exit(0); } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } lparm->biased_hyperplane=0; /* set threshold to zero */ epsilon=100.0; /* start with low precision and increase later */ epsilon_cached=epsilon; /* epsilon to use for iterations using constraints constructed from the constraint cache */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } kparm->gram_matrix=NULL; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) kparm->gram_matrix=init_kernel_matrix(&cset,kparm); /* set initial model and slack variables */ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { if(USE_FYCACHE) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR_KERNEL) { /* store difference vector directly */ diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); free_svector(fy); fy=diff; } } else fy=NULL; fycache[i]=fy; } /* initialize the constraint cache */ if(alg_type == ONESLACK_DUAL_CACHE_ALG) { ccache=create_constraint_cache(sample,sparm,sm); /* NOTE: */ for(i=0;i<n;i++) if(loss(ex[i].y,ex[i].y,sparm) != 0) { printf("ERROR: Loss function returns non-zero value loss(y_%d,y_%d)\n",i,i); printf(" W4 algorithm assumes that loss(y_i,y_i)=0 for all i.\n"); exit(1); } } if(kparm->kernel_type == LINEAR_KERNEL) lhs_n=create_nvector(sm->sizePsi); /* randomize order or training examples */ if(batch_size<n) randmapping=random_order(n); rt_init+=MAX(get_runtime()-rt1,0); rt_total+=rt_init; /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively find and add constraints to working set */ if(struct_verbosity>=1) { printf("Iter %i: ",++numIt); fflush(stdout); } rt1=get_runtime(); /**** compute current slack ****/ alphasum=0; for(j=0;(j<cset.m);j++) alphasum+=alpha[j]; for(j=0,slack=-1;(j<cset.m) && (slack==-1);j++) if(alpha[j] > alphasum/cset.m) slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); slack=MAX(0,slack); rt_total+=MAX(get_runtime()-rt1,0); /**** find a violated joint constraint ****/ lhs=NULL; rhs=0; if(alg_type == ONESLACK_DUAL_CACHE_ALG) { rt1=get_runtime(); /* Compute violation of constraints in cache for current w */ if(struct_verbosity>=2) rt2=get_runtime(); update_constraint_cache_for_model(ccache, svmModel); if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0); /* Is there is a sufficiently violated constraint in cache? */ viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2); if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { /* There is a sufficiently violated constraint in cache, so use this constraint in this iteration. */ if(struct_verbosity>=2) rt2=get_runtime(); viol=find_most_violated_joint_constraint_in_cache(ccache, epsilon_est/2,lhs_n,&lhs,&rhs); if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); cached_constraint=1; } else { /* There is no sufficiently violated constraint in cache, so update cache by computing most violated constraint explicitly for batch_size examples. */ viol_est=0; progress=0; viol=compute_violation_of_constraint_in_cache(ccache,0); for(j=0;(j<batch_size) || ((j<n)&&(viol-slack<sparm->epsilon));j++) { if(struct_verbosity>=1) print_percent_progress(&progress,n,10,"."); uptr=uptr % n; if(randmapping) i=randmapping[uptr]; else i=uptr; /* find most violating fydelta=fy-fybar and rhs for example i */ find_most_violated_constraint(&fydelta,&rhs_i,&ex[i], fycache[i],n,sm,sparm, &rt_viol,&rt_psi,&argmax_count); /* add current fy-fybar and loss to cache */ if(struct_verbosity>=2) rt2=get_runtime(); viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model, i,fydelta,rhs_i,0.0001*sparm->epsilon/n, sparm->ccache_size,&rt_cachesum); if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0); viol_est+=ccache->constlist[i]->viol; uptr++; } cached_constraint=(j<n); if(struct_verbosity>=2) rt2=get_runtime(); if(cached_constraint) viol=find_most_violated_joint_constraint_in_cache(ccache, epsilon_est/2,lhs_n,&lhs,&rhs); else viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n, &lhs,&rhs); if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); viol_est*=((double)n/j); epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack); if((struct_verbosity >= 1) && (j!=n)) printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)", 100.0*j/n,viol_est-slack,epsilon_est); } lhsXw=rhs-viol; rt_total+=MAX(get_runtime()-rt1,0); } else { /* do not use constraint from cache */ rt1=get_runtime(); cached_constraint=0; if(kparm->kernel_type == LINEAR_KERNEL) clear_nvector(lhs_n,sm->sizePsi); progress=0; rt_total+=MAX(get_runtime()-rt1,0); for(i=0; i<n; i++) { rt1=get_runtime(); if(struct_verbosity>=1) print_percent_progress(&progress,n,10,"."); /* compute most violating fydelta=fy-fybar and rhs for example i */ find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n, sm,sparm,&rt_viol,&rt_psi,&argmax_count); /* add current fy-fybar to lhs of constraint */ if(kparm->kernel_type == LINEAR_KERNEL) { add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */ free_svector(fydelta); } else { append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */ lhs=fydelta; } rhs+=rhs_i; /* add loss to rhs */ rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); /* create sparse vector from dense sum */ if(kparm->kernel_type == LINEAR_KERNEL) lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0, COMPACT_ROUNDING_THRESH); doc=create_example(cset.m,0,1,1,lhs); lhsXw=classify_example(svmModel,doc); free_example(doc,0); viol=rhs-lhsXw; rt_total+=MAX(get_runtime()-rt1,0); } /* end of finding most violated joint constraint */ rt1=get_runtime(); /**** if `error', then add constraint and recompute QP ****/ if(slack > (rhs-lhsXw+0.000001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw); /* exit(1); */ } ceps=MAX(0,rhs-lhsXw-slack); if((ceps > sparm->epsilon) || cached_constraint) { /**** resize constraint matrix and add new constraint ****/ cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1)); cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs); cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1)); cset.rhs[cset.m]=rhs; alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1)); alpha[cset.m]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1)); alphahist[cset.m]=optcount; cset.m++; totconstraints++; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) { if(struct_verbosity>=2) rt2=get_runtime(); kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1, &cset,kparm); if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0); } /**** get new QP solution ****/ if(struct_verbosity>=1) { printf("*");fflush(stdout); } if(struct_verbosity>=2) rt2=get_runtime(); /* set svm precision so that higher than eps of most violated constr */ if(cached_constraint) { epsilon_cached=MIN(epsilon_cached,ceps); lparm->epsilon_crit=epsilon_cached/2; } else { epsilon=MIN(epsilon,ceps); /* best eps so far */ lparm->epsilon_crit=epsilon/2; epsilon_cached=epsilon; } free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Run the QP solver on cset. */ kernel_type_org=kparm->kernel_type; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) kparm->kernel_type=GRAM; /* use kernel stored in kparm */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, lparm,kparm,NULL,svmModel,alpha); kparm->kernel_type=kernel_type_org; svmModel->kernel_parm.kernel_type=kernel_type_org; /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=3) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist,50); if(struct_verbosity>=3) printf("done. "); } else { free_svector(lhs); } if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); rt_total+=MAX(get_runtime()-rt1,0); } while(finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)|| cached_constraint || (ceps > sparm->epsilon) ); // originally like below ... finalize_iteration was not called because of short-circuit evaluation // } while(cached_constraint || (ceps > sparm->epsilon) || // finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm) // ); if(struct_verbosity>=1) { printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,ceps)); slack=0; for(j=0;j<cset.m;j++) slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; if(kparm->kernel_type == LINEAR_KERNEL) modellength=model_length_n(svmModel); else modellength=model_length_s(svmModel); dualitygap=(0.5*modellength*modellength+sparm->C*viol) -(alphasum-0.5*modellength*modellength); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Primal objective value: pval=%.5f\n", 0.5*modellength*modellength+sparm->C*viol); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Norm of weight vector: |w|=%.5f\n",modellength); printf("Value of slack variable (on working set): xi=%.5f\n",slack); printf("Value of slack variable (global): xi=%.5f\n",viol); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); if(struct_verbosity>=2) printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total, (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total, (100.0*rt_cachesum)/rt_total); else if(struct_verbosity==1) printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0); } if(ccache) { long cnum=0; CCACHEELEM *celem; for(i=0;i<n;i++) for(celem=ccache->constlist[i];celem;celem=celem->next) cnum++; printf("Final number of constraints in cache: %ld\n",cnum); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ free_model(svmModel,0); } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(lhs_n) free_nvector(lhs_n); if(ccache) free_constraint_cache(ccache); for(i=0;i<n;i++) if(fycache[i]) free_svector(fycache[i]); free(fycache); free(alpha); free(alphahist); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); if(kparm->gram_matrix) free_matrix(kparm->gram_matrix); }
//================================================================ void free_track_section_model(int scene, int trackid, int sectionid) { if(!is_track(scene,trackid)) return; free_model(trackscenevec[scene]->trackvec[trackid], sectionid); }
void write_model(char *modelfile, MODEL *model) { FILE *modelfl; long j,i,sv_num; SVECTOR *v; MODEL *compact_model=NULL; if(verbosity>=1) { printf("Writing model file..."); fflush(stdout); } /* Replace SV with single weight vector */ if(0 && model->kernel_parm.kernel_type == LINEAR) { if(verbosity>=1) { printf("(compacting..."); fflush(stdout); } compact_model=compact_linear_model(model); model=compact_model; if(verbosity>=1) { printf("done)"); fflush(stdout); } } if ((modelfl = fopen (modelfile, "w")) == NULL) { perror (modelfile); exit (1); } fprintf(modelfl,"SVM-light Version %s\n",VERSION); fprintf(modelfl,"%ld # kernel type\n", model->kernel_parm.kernel_type); fprintf(modelfl,"%ld # kernel parameter -d \n", model->kernel_parm.poly_degree); fprintf(modelfl,"%.8g # kernel parameter -g \n", model->kernel_parm.rbf_gamma); fprintf(modelfl,"%.8g # kernel parameter -s \n", model->kernel_parm.coef_lin); fprintf(modelfl,"%.8g # kernel parameter -r \n", model->kernel_parm.coef_const); fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom); fprintf(modelfl,"%ld # highest feature index \n",model->totwords); fprintf(modelfl,"%ld # number of training documents \n",model->totdoc); sv_num=1; for(i=1;i<model->sv_num;i++) { for(v=model->supvec[i]->fvec;v;v=v->next) sv_num++; } fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num); fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b); for(i=1;i<model->sv_num;i++) { for(v=model->supvec[i]->fvec;v;v=v->next) { fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor); for (j=0; (v->words[j]).wnum; j++) { fprintf(modelfl,"%ld:%.8g ", (long)(v->words[j]).wnum, (double)(v->words[j]).weight); } if(v->userdefined) fprintf(modelfl,"#%s\n",v->userdefined); else fprintf(modelfl,"#\n"); /* NOTE: this could be made more efficient by summing the alpha's of identical vectors before writing them to the file. */ } } fclose(modelfl); if(compact_model) free_model(compact_model,1); if(verbosity>=1) { printf("done\n"); } }
IMP_VOID ipReleaseTargetClassifieri( IpTargetClassifier *pstTgtClfier ) { IpClassifierPara *pstParams = &pstTgtClfier->stPara; free_model(pstParams->pstModel,1); memset( pstTgtClfier, 0, sizeof(IpTargetClassifier) ); }
int main(int argc, char *argv[]) { int i, k, n=0; int iformat, oformat; nip_model model = NULL; time_series* ts_set = NULL; if(argc < 6){ printf("You must specify: \n"); printf(" - the NET file for the model, \n"); printf(" - input format ('univariate'), \n"); printf(" - input file name, \n"); printf(" - output format ('unary'), \n"); printf(" - output file name, please!\n"); return 0; } /* read the model */ model = parse_model(argv[1]); if(!model){ printf("Unable to parse the NET file: %s?\n", argv[1]); return -1; } /* read file formats */ /* Reminder: strcasecmp() is NOT ANSI C. */ if(strcasecmp(argv[2], S_UNIVARIATE) == 0) iformat = UNIVARIATE; /* additional formats here */ else{ printf("Invalid input file format: %s?\n", argv[2]); free_model(model); return -1; } if(strcasecmp(argv[4], S_UNARY) == 0) oformat = UNARY; /* additional formats here */ else{ printf("Invalid output file format: %s?\n", argv[4]); free_model(model); return -1; } /* Read the input data file */ switch (iformat) { case UNIVARIATE: case MULTIVARIATE: n = read_timeseries(model, argv[3], &ts_set); break; default: n = 0; /* should be impossible */ } if(n < 1){ fprintf(stderr, "There were errors while reading %s\n", argv[3]); free_model(model); /* no ts_set to free (?) */ return -1; } /* Write the results to the file */ k = NIP_NO_ERROR; switch (oformat) { case UNARY: k = write_unary_timeseries(ts_set, n, argv[5]); break; default: ; /* shouldn't happen */ } if(k != NIP_NO_ERROR){ fprintf(stderr, "Failed to write the data into %s\n", argv[5]); nip_report_error(__FILE__, __LINE__, k, 1); for(i = 0; i < n; i++) free_timeseries(ts_set[i]); free(ts_set); free_model(model); return -1; } for(i = 0; i < n; i++) free_timeseries(ts_set[i]); free(ts_set); free_model(model); return 0; }
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm) { int i,j; int numIt=0; long newconstraints=0, activenum=0; int opti_round, *opti; long old_numConst=0; double epsilon; long tolerance; double lossval,factor; double margin=0; double slack, *slacks, slacksum; long sizePsi; double *alpha=NULL; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0.0, rt_opt=0.0; long rt1,rt2; init_struct_model(sample,sm,sparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize example selection heuristic */ opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=1.0; /* start with low precision and increase later */ tolerance=n/100; /* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=realloc(alpha,sizeof(double)*cset.m); for(i=0; i<cset.m; i++) alpha[i]=0; } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ printf("Starting Iterations\n"); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.09999999999,sparm->epsilon); if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ old_numConst=cset.m; opti_round++; activenum=n; do { /* go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("--Iteration %i (%ld active): ",++numIt,activenum); fflush(stdout); } for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if(opti[i] != opti_round) {/* if the example is not shrunk away, then see if it is necessary to add a new constraint */ if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*sparm->C)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ if((classify_example(svmModel,doc)+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i) ",i); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*sparm->C)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,"",1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; newconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR) kcache=kernel_cache_init(cset.m,lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ rt_opt+=MAX(get_runtime()-rt2,0); newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, Eps=%.4f)\n",cset.m,svmModel->sv_num-1, svmModel->maxdiff); } while(activenum > 0); /* repeat until all examples produced no constraint at least once */ } while((cset.m - old_numConst) > tolerance) ; } while(epsilon > sparm->epsilon); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*sparm->C)))); } slacksum=0; for(i=0; i<=n; i++) slacksum+=slacks[i]; free(slacks); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Total number of constraints added: %i\n",(int)cset.m); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n", model_length_s(svmModel,kparm)); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", model_length_s(svmModel,kparm)); } printf("Sum of slack variables: sum(xi_i)=%.5f\n",slacksum); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for SVM optimization)\n", rt_total/100.0, 100.0*rt_opt/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(svmModel) free_model(svmModel,0); free(alpha); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
int SVMLightRunner::librarySVMLearnMain( int argc, char **argv, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Started." ); DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); // GMUM.R changes { librarySVMLearnReadInputParameters( argc, argv, docfile, modelfile, restartfile, &verbosity, &learn_parm, &kernel_parm, use_gmumr, config ); kernel_parm.kernel_type = static_cast<long int>(config.kernel_type); libraryReadDocuments( docfile, &docs, &target, &totwords, &totdoc, use_gmumr, config ); // GMUM.R changes } if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the * same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } //gmum.r init_global_params_QP(); if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } //gmum.r config.iter = learn_parm.iterations; if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ // GMUM.R changes { if (!use_gmumr) { write_model(modelfile,model); } else { SVMLightModelToSVMConfiguration(model, config); } // GMUM.R changes } free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Done." ); return(0); }