void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm) { int i,j; int numIt=0; long newconstraints=0, activenum=0; int opti_round, *opti; long old_numConst=0; double epsilon; long tolerance; double lossval,factor; double margin=0; double slack, *slacks, slacksum; long sizePsi; double *alpha=NULL; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0.0, rt_opt=0.0; long rt1,rt2; init_struct_model(sample,sm,sparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize example selection heuristic */ opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=1.0; /* start with low precision and increase later */ tolerance=n/100; /* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=realloc(alpha,sizeof(double)*cset.m); for(i=0; i<cset.m; i++) alpha[i]=0; } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ printf("Starting Iterations\n"); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.09999999999,sparm->epsilon); if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ old_numConst=cset.m; opti_round++; activenum=n; do { /* go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("--Iteration %i (%ld active): ",++numIt,activenum); fflush(stdout); } for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if(opti[i] != opti_round) {/* if the example is not shrunk away, then see if it is necessary to add a new constraint */ if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*sparm->C)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ if((classify_example(svmModel,doc)+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i) ",i); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*sparm->C)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,"",1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; newconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR) kcache=kernel_cache_init(cset.m,lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ rt_opt+=MAX(get_runtime()-rt2,0); newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, Eps=%.4f)\n",cset.m,svmModel->sv_num-1, svmModel->maxdiff); } while(activenum > 0); /* repeat until all examples produced no constraint at least once */ } while((cset.m - old_numConst) > tolerance) ; } while(epsilon > sparm->epsilon); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*sparm->C)))); } slacksum=0; for(i=0; i<=n; i++) slacksum+=slacks[i]; free(slacks); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Total number of constraints added: %i\n",(int)cset.m); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n", model_length_s(svmModel,kparm)); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", model_length_s(svmModel,kparm)); } printf("Sum of slack variables: sum(xi_i)=%.5f\n",slacksum); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for SVM optimization)\n", rt_total/100.0, 100.0*rt_opt/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(svmModel) free_model(svmModel,0); free(alpha); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long newconstraints=0, totconstraints=0, activenum=0; int opti_round, *opti, fullround, use_shrinking; long old_totconstraints=0; double epsilon,svmCnorm; long tolerance,new_precision=1,dont_stop=0; double lossval,factor,dist; double margin=0; double slack, *slacks, slacksum, ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0,lastoptcount=0; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f, **fycache=NULL; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0; double rt1,rt2; rt1=get_runtime(); init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize shrinking-style example selection heuristic */ if(alg_type == NSLACK_SHRINK_ALG) use_shrinking=1; else use_shrinking=0; opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; /* normalize regularization parameter C by the number of training examples */ svmCnorm=sparm->C/n; if(sparm->slack_norm == 1) { lparm->svm_c=svmCnorm; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR_KERNEL) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=100.0; /* start with low precision and increase later */ tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ if(USE_FYCACHE) { fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ free_svector(fy); fy=diff; } fycache[i]=fy; } } rt_init+=MAX(get_runtime()-rt1,0); rt_total+=MAX(get_runtime()-rt1,0); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.49999999999,sparm->epsilon); new_precision=1; if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ opti_round++; activenum=n; dont_stop=0; old_totconstraints=totconstraints; do { /* with shrinking turned on, go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("Iter %i (%ld active): ",++numIt,activenum); fflush(stdout); } ceps=0; fullround=(activenum == n); for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if((!use_shrinking) || (opti[i] != opti_round)) { /* if the example is not shrunk away, then see if it is necessary to add a new constraint */ rt2=get_runtime(); argmax_count++; if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); rt_viol+=MAX(get_runtime()-rt2,0); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ rt2=get_runtime(); if(fycache) fy=copy_svector(fycache[i]); else fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); rt_psi+=MAX(get_runtime()-rt2,0); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*svmCnorm)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ dist=classify_example(svmModel,doc); ceps=MAX(ceps,margin-dist-slack); if(slack > (margin-dist+0.0001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist); /* exit(1); */ } if((dist+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*svmCnorm)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,NULL,1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); alphahist[cset.m-1]=optcount; newconstraints++; totconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1)) || (new_precision && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; rt_opt+=MAX(get_runtime()-rt2,0); if(new_precision && (epsilon <= sparm->epsilon)) dont_stop=1; /* make sure we take one final pass */ new_precision=0; newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=2) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist, MAX(50,optcount-lastoptcount)); lastoptcount=optcount; if(struct_verbosity>=2) printf("done. (NumConst=%d)\n",cset.m); rt_total+=MAX(get_runtime()-rt1,0); } while(use_shrinking && (activenum > 0)); /* when using shrinking, repeat until all examples produced no constraint at least once */ } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop); } while((epsilon > sparm->epsilon) || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm)); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ /**** WARNING: If positivity constraints are used, then the maximum slack id is larger than what is allocated below ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm)))); } slacksum=0; for(i=1; i<=n; i++) slacksum+=slacks[i]; free(slacks); alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; modellength=model_length_s(svmModel); dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps)) -(alphasum-0.5*modellength*modellength); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n",modellength); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", modellength); } printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(fycache) { for(i=0;i<n;i++) free_svector(fycache[i]); free(fycache); } if(svmModel) free_model(svmModel,0); free(alpha); free(alphahist); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
int _svm_learn (int argc, char* argv[]) { char docfile[200]; /* file with training examples */ char modelfile[200]; /* file for resulting classifier */ char restartfile[200]; /* file with initial alphas */ DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); HIDEO_ENV *hideo_env=create_env(); model->td_pred=NULL; model->n_td_pred=0; _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); read_documents(docfile,&docs,&target,&totwords,&totdoc); if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model,hideo_env); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in,hideo_env); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ write_model(modelfile,model); free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); free_env(hideo_env); return(0); }
int main (int argc, char* argv[]) { DOC *docs; /* training examples */ long max_docs,max_words_doc; long totwords,totdoc,ll,i; long kernel_cache_size; double *target; KERNEL_CACHE kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; read_input_parameters(argc,argv,docfile,modelfile,&verbosity, &kernel_cache_size,&learn_parm,&kernel_parm); if(verbosity>=1) { printf("Scanning examples..."); fflush(stdout); } nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ max_words_doc+=10; ll+=10; max_docs+=2; if(verbosity>=1) { printf("done\n"); fflush(stdout); } docs = (DOC *)my_malloc(sizeof(DOC)*max_docs); /* feature vectors */ target = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ //printf("\nMax docs: %ld, approximated number of feature occurences %ld, maximal length of a line %ld\n\n",max_docs,max_words_doc,ll); read_documents(docfile,docs,target,max_words_doc,ll,&totwords,&totdoc,&kernel_parm); printf("\nNumber of examples: %ld, linear space size: %ld\n\n",totdoc,totwords); //if(kernel_parm.kernel_type==5) totwords=totdoc; // The number of features is proportional to the number of parse-trees, i.e. totdoc // or should we still use totwords to approximate svm_maxqpsize for the Tree Kernel (see hideo.c) ??????? if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,NULL,&model); } } else { if(learn_parm.type == CLASSIFICATION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,totdoc,kernel_cache_size); svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == REGRESSION) { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache_init(&kernel_cache,2*totdoc,kernel_cache_size); svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); /* Free the memory used for the cache. */ kernel_cache_cleanup(&kernel_cache); } else if(learn_parm.type == RANKING) { printf("Learning rankings is not implemented for non-linear kernels in this version!\n"); exit(1); } else if(learn_parm.type == PERCEPTRON) { perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model,modelfile); } else if(learn_parm.type == PERCEPTRON_BATCH) { batch_perceptron_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache_size,&model); } } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ write_model(modelfile,&model); free(model.supvec); free(model.alpha); free(model.index); for(i=0;i<totdoc;i++){ freeExample(&docs[i]); } free(docs); free(target); return(0); }
int SVMLightRunner::librarySVMLearnMain( int argc, char **argv, bool use_gmumr, SVMConfiguration &config ) { LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Started." ); DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); // GMUM.R changes { librarySVMLearnReadInputParameters( argc, argv, docfile, modelfile, restartfile, &verbosity, &learn_parm, &kernel_parm, use_gmumr, config ); kernel_parm.kernel_type = static_cast<long int>(config.kernel_type); libraryReadDocuments( docfile, &docs, &target, &totwords, &totdoc, use_gmumr, config ); // GMUM.R changes } if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the * same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } //gmum.r init_global_params_QP(); if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,model,alpha_in); } //gmum.r config.iter = learn_parm.iterations; if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Warning: The model contains references to the original data 'docs'. If you want to free the original data, and only keep the model, you have to make a deep copy of 'model'. */ /* deep_copy_of_model=copy_model(model); */ // GMUM.R changes { if (!use_gmumr) { write_model(modelfile,model); } else { SVMLightModelToSVMConfiguration(model, config); } // GMUM.R changes } free(alpha_in); free_model(model,0); for(i=0;i<totdoc;i++) free_example(docs[i],1); free(docs); free(target); LOG( config.log, LogLevel::DEBUG_LEVEL, __debug_prefix__ + ".librarySVMLearnMain() Done." ); return(0); }
/* call as model = mexsvmlearn(data,labels,options) */ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { char **argv; int argc; DOC **docs; /* training examples */ long totwords,totdoc,i; double *target; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; MODEL model; /* check for valid calling format */ if ((nrhs != 3) || (nlhs != 1)) mexErrMsgTxt(ERR001); if (mxGetM(prhs[0]) != mxGetM(prhs[1])) mexErrMsgTxt(ERR002); if (mxGetN(prhs[1]) != 1) mexErrMsgTxt(ERR003); /* reset static variables -- as a .DLL, static things are sticky */ global_init( ); /* convert the parameters (given in prhs[2]) into an argv/argc combination */ argv = make_argv((mxArray *)prhs[2],&argc); /* send the options */ /* this was originally supposed to be argc, argv, re-written for MATLAB ... its cheesy - but it workss :: convert the options array into an argc, argv pair and let svm_lite handle it from there. */ read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, &learn_parm,&kernel_parm); extract_user_opts((mxArray *)prhs[2], &kernel_parm); totdoc = mxGetM(prhs[0]); totwords = mxGetN(prhs[0]); /* prhs[0] = samples (mxn) array prhs[1] = labels (mx1) array */ mexToDOC((mxArray *)prhs[0], (mxArray *)prhs[1], &docs, &target, NULL, NULL); /* TODO modify to accept this array if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); */ if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); } if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,&model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,&kernel_cache,&model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, &kernel_parm,kernel_cache,&model,alpha_in); } else { mexErrMsgTxt(ERR004); } if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* ********************************** * After the training/learning portion has finished, * copy the model back to the output arrays for MATLAB * ********************************** */ store_model(&model, plhs); free_kernel(); global_destroy( ); }