int main(int argc, char* argv[]) { double avgloss,l; long i, correct; char testfile[1024]; char modelfile[1024]; STRUCTMODEL model; STRUCT_LEARN_PARM sparm; LEARN_PARM lparm; KERNEL_PARM kparm; SAMPLE testsample; LABEL y; LATENT_VAR h; /* read input parameters */ read_input_parameters(argc,argv,testfile,modelfile,&sparm); /* read model file */ printf("Reading model..."); fflush(stdout); // model = read_struct_model(modelfile, &sparm); printf("done.\n"); /* read test examples */ printf("Reading test examples..."); fflush(stdout); testsample = read_struct_examples(testfile,&sparm); printf("done.\n"); init_struct_model(testsample,&model,&sparm,&lparm,&kparm); avgloss = 0.0; correct = 0; for (i=0;i<testsample.n;i++) { classify_struct_example(testsample.examples[i].x,&y,&h,&model,&sparm); l = loss(testsample.examples[i].y,y,h,&sparm); avgloss += l; if (l==0) correct++; free_label(y); free_latent_var(h); } printf("Average loss on test set: %.4f\n", avgloss/testsample.n); printf("Zero/one error on test set: %.4f\n", 1.0 - ((float) correct)/testsample.n); free_struct_sample(testsample); free_struct_model(model,&sparm); return(0); }
int main(int argc, char* argv[]) { double *scores = NULL; long i; char testfile[1024]; char modelfile[1024]; char scoreFile[1024]; FILE *fscore; STRUCTMODEL model; STRUCT_LEARN_PARM sparm; LEARN_PARM lparm; KERNEL_PARM kparm; SAMPLE testsample; /* read input parameters */ read_input_parameters(argc,argv,testfile,modelfile,scoreFile,&sparm); fscore = fopen(scoreFile,"w"); /* read model file */ printf("Reading model..."); fflush(stdout); model = read_struct_model(modelfile, &sparm); printf("done.\n"); /* read test examples */ printf("Reading test examples..."); fflush(stdout); testsample = read_struct_test_examples(testfile,&sparm); printf("done.\n"); init_struct_model(testsample,&model,&sparm,&lparm,&kparm); scores = classify_struct_example(testsample.examples[0].x,&model); for(i = 0; i < (testsample.examples[0].n_pos+testsample.examples[0].n_neg); i++){ fprintf(fscore, "%0.5f\n", scores[i]); } fclose(fscore); //free_struct_sample(testsample); TODO: Uncomment this, and fix this function. It frees h.h_is which was never allocated while classifying. free_struct_model(model,&sparm); return(0); }
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm) { int i,j; int numIt=0; long newconstraints=0, activenum=0; int opti_round, *opti; long old_numConst=0; double epsilon; long tolerance; double lossval,factor; double margin=0; double slack, *slacks, slacksum; long sizePsi; double *alpha=NULL; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0.0, rt_opt=0.0; long rt1,rt2; init_struct_model(sample,sm,sparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize example selection heuristic */ opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=1.0; /* start with low precision and increase later */ tolerance=n/100; /* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=realloc(alpha,sizeof(double)*cset.m); for(i=0; i<cset.m; i++) alpha[i]=0; } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ printf("Starting Iterations\n"); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.09999999999,sparm->epsilon); if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ old_numConst=cset.m; opti_round++; activenum=n; do { /* go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("--Iteration %i (%ld active): ",++numIt,activenum); fflush(stdout); } for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if(opti[i] != opti_round) {/* if the example is not shrunk away, then see if it is necessary to add a new constraint */ if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*sparm->C)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ if((classify_example(svmModel,doc)+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i) ",i); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*sparm->C)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,"",1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; newconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR) kcache=kernel_cache_init(cset.m,lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ rt_opt+=MAX(get_runtime()-rt2,0); newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, Eps=%.4f)\n",cset.m,svmModel->sv_num-1, svmModel->maxdiff); } while(activenum > 0); /* repeat until all examples produced no constraint at least once */ } while((cset.m - old_numConst) > tolerance) ; } while(epsilon > sparm->epsilon); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*sparm->C)))); } slacksum=0; for(i=0; i<=n; i++) slacksum+=slacks[i]; free(slacks); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Total number of constraints added: %i\n",(int)cset.m); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n", model_length_s(svmModel,kparm)); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", model_length_s(svmModel,kparm)); } printf("Sum of slack variables: sum(xi_i)=%.5f\n",slacksum); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for SVM optimization)\n", rt_total/100.0, 100.0*rt_opt/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(svmModel) free_model(svmModel,0); free(alpha); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
int main(int argc, char* argv[]) { // The file to create the online version of the code printf("Runs with F1 loss in the loss-augmented objective .. only positive data .. with weighting of Fscores .. no regions file"); // double *w; /* weight vector */ double C, epsilon, Cdash; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; char trainfile[1024]; char modelfile[1024]; int MAX_ITER; SAMPLE sample; STRUCT_LEARN_PARM sparm; STRUCTMODEL sm; /* read input parameters */ my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm); epsilon = learn_parm.eps; C = learn_parm.svm_c; Cdash = learn_parm.Cdash; MAX_ITER = learn_parm.maxiter; /* read in examples */ //strcpy(trainfile, "dataset/reidel_trainSVM.small.data"); sample = read_struct_examples(trainfile,&sparm); /* initialization */ init_struct_model(sample,&sm,&sparm,&learn_parm,&kernel_parm); // (OnlineSVM : Commenting 'w' as they are replaced by 'w_iters' // w = create_nvector(sm.sizePsi); // clear_nvector(w, sm.sizePsi); // sm.w = w; /* establish link to w, as long as w does not change pointer */ double *zeroes = create_nvector(sm.sizePsi); clear_nvector(zeroes, sm.sizePsi); // printf("Addr. of w (init) %x\t%x\n",w,sm.w); time_t time_start_full, time_end_full; int eid,totalEpochs=learn_parm.totalEpochs; int chunkid, numChunks=learn_parm.numChunks; double primal_obj_sum, primal_obj; char chunk_trainfile[1024]; SAMPLE * chunk_dataset = (SAMPLE *) malloc(sizeof(SAMPLE)*numChunks); /** * If we have ‘k’ instances and do ‘n’ epochs, after processing each chunk we update the weight. * Since we do ‘k’ updates, we will have ‘k’ weight vectors after each epoch. * After ‘n’ epochs, we will have ‘k*n’ weight vectors. */ // -------------------------------------------------------------------------------------------------------------------------------- double ***w_iters = (double**) malloc(totalEpochs*sizeof(double**)); // printf("--2: After 1st malloc -- %x; sz = %d\n", w_iters, totalEpochs*sizeof(double**)); for(eid = 0; eid < totalEpochs; eid++){ w_iters[eid] = (double*) malloc(numChunks*sizeof(double*)); // printf("2.5... id = %d, .. allocated ... %x; sz = %d\n",eid, w_iters[eid],numChunks*sizeof(double*)); } printf("--3: After 2nd malloc \n"); for(eid = 0; eid < totalEpochs; eid++){ for(chunkid = 0; chunkid < numChunks; chunkid++){ w_iters[eid][chunkid] = create_nvector(sm.sizePsi); // printf("Confirming memory location : %x\n",w_iters[eid][chunkid]); clear_nvector(w_iters[eid][chunkid], sm.sizePsi); } } sm.w_iters = w_iters; printf("(ONLINE SVM) Completed the memory alloc for the parameters\n"); // -------------------------------------------------------------------------------------------------------------------------------- /** * Having divided the dataset (X,Y) into set of 'k' chunks / sub-datasets (X_1,Y_1) ... (X_k, Y_k) * Do the following do while routine for one set of datapoints (sub-datasets) */ // -------------------------------------------------------------------------------------------------------------------------------- printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Changed .... Calling Java to split dataset\n"); char *cmd = malloc(1000); strcpy(cmd,"java -Xmx1G -cp java/bin:java/lib/* " " javaHelpers.splitDataset "); strcat(cmd, trainfile); strcat(cmd, " "); char numChunks_str[10]; sprintf(numChunks_str, "%d", numChunks); strcat(cmd, numChunks_str); strcat(cmd, " "); printf("Executing cmd : %s\n", cmd);fflush(stdout); system(cmd); // -------------------------------------------------------------------------------------------------------------------------------- for(chunkid = 0; chunkid < numChunks; chunkid++) { memset(chunk_trainfile, 0, 1024); strcat(chunk_trainfile,trainfile); strcat(chunk_trainfile,".chunks/chunk."); // NOTE: Name hard-coded according to the convention used to create chunked files char chunkid_str[10];sprintf(chunkid_str, "%d", chunkid); strcat(chunk_trainfile,chunkid_str); printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Changed .... Reading chunked dataset\n"); printf("Chunk trainfile : %s\n",chunk_trainfile); chunk_dataset[chunkid] = read_struct_examples_chunk(chunk_trainfile); } time(&time_start_full); for(eid = 0; eid < totalEpochs; eid++) { printf("(ONLINE LEARNING) : EPOCH %d\n",eid); primal_obj_sum = 0.0; for(chunkid = 0; chunkid < numChunks; chunkid++) // NOTE: Chunkid starts from 1 and goes upto numChumks { int sz = sample.n / numChunks; int datasetStartIdx = (chunkid) * sz; int chunkSz = (numChunks-1 == chunkid) ? (sample.n - ((numChunks-1)*sz) ) : (sz); primal_obj = optimizeMultiVariatePerfMeasure(chunk_dataset[chunkid], datasetStartIdx, chunkSz, &sm, &sparm, C, Cdash, epsilon, MAX_ITER, &learn_parm, trainfile, w_iters, eid, chunkid, numChunks, zeroes); printf("(ONLINE LEARNING) : FINISHED PROCESSING CHUNK (PSEUDO-DATAPOINT) %d of %d\n",chunkid+1, numChunks); primal_obj_sum += primal_obj; printf("(OnlineSVM) : Processed pseudo-datapoint -- primal objective sum: %.4f\n", primal_obj_sum); } // After the completion of one epoch, warm start the 2nd epoch with the values of the // weight vectors seen at the end of the last chunk in previous epoch if(eid + 1 < totalEpochs){ //init w_iters[eid+1][0] to w_iters[eid][numChunks-1] copy_vector(w_iters[eid+1][0], w_iters[eid][numChunks-1], sm.sizePsi); printf("(ONLINE LEARNING) : WARM START ACROSS EPOCHS ..... DONE....\n"); } printf("(OnlineSVM) : EPOCH COMPLETE -- primal objective: %.4f\n", primal_obj); printf("(ONLINE LEARNING) : EPOCH %d DONE! .....\n",eid); } time(&time_end_full); char msg[20]; sprintf(msg,"(ONLINE LEARNING) : Total Time Taken : "); print_time(time_start_full, time_end_full, msg); printf("(ONLINE LEARNING) Reached here\n"); /* write structural model */ write_struct_model_online(modelfile, &sm, &sparm, totalEpochs, numChunks); // skip testing for the moment printf("(ONLINE LEARNING) Complete dumping\n"); /* free memory */ //TODO: Need to change this ... free_struct_sample(sample); free_struct_model(sm, &sparm); return(0); }
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long totconstraints=0; long kernel_type_org; double epsilon,epsilon_cached; double lhsXw,rhs_i; double rhs=0; double slack,ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0; CONSTSET cset; SVECTOR *diff=NULL; double *lhs_n=NULL; SVECTOR *fy, *fydelta, **fycache, *lhs; MODEL *svmModel=NULL; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0; double rt_cacheupdate=0,rt_cacheconst=0,rt_cacheadd=0,rt_cachesum=0; double rt1=0,rt2=0; long progress; /* SVECTOR ***fydelta_cache=NULL; double **loss_cache=NULL; int cache_size=0; */ CCACHE *ccache=NULL; int cached_constraint; double viol,viol_est,epsilon_est=0; long uptr=0; long *randmapping=NULL; long batch_size=n; rt1=get_runtime(); if(sparm->batch_size<100) batch_size=sparm->batch_size*n/100.0; init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); fflush(stdout); exit(0); } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } lparm->biased_hyperplane=0; /* set threshold to zero */ epsilon=100.0; /* start with low precision and increase later */ epsilon_cached=epsilon; /* epsilon to use for iterations using constraints constructed from the constraint cache */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } kparm->gram_matrix=NULL; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) kparm->gram_matrix=init_kernel_matrix(&cset,kparm); /* set initial model and slack variables */ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { if(USE_FYCACHE) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR_KERNEL) { /* store difference vector directly */ diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); free_svector(fy); fy=diff; } } else fy=NULL; fycache[i]=fy; } /* initialize the constraint cache */ if(alg_type == ONESLACK_DUAL_CACHE_ALG) { ccache=create_constraint_cache(sample,sparm,sm); /* NOTE: */ for(i=0;i<n;i++) if(loss(ex[i].y,ex[i].y,sparm) != 0) { printf("ERROR: Loss function returns non-zero value loss(y_%d,y_%d)\n",i,i); printf(" W4 algorithm assumes that loss(y_i,y_i)=0 for all i.\n"); exit(1); } } if(kparm->kernel_type == LINEAR_KERNEL) lhs_n=create_nvector(sm->sizePsi); /* randomize order or training examples */ if(batch_size<n) randmapping=random_order(n); rt_init+=MAX(get_runtime()-rt1,0); rt_total+=rt_init; /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively find and add constraints to working set */ if(struct_verbosity>=1) { printf("Iter %i: ",++numIt); fflush(stdout); } rt1=get_runtime(); /**** compute current slack ****/ alphasum=0; for(j=0;(j<cset.m);j++) alphasum+=alpha[j]; for(j=0,slack=-1;(j<cset.m) && (slack==-1);j++) if(alpha[j] > alphasum/cset.m) slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); slack=MAX(0,slack); rt_total+=MAX(get_runtime()-rt1,0); /**** find a violated joint constraint ****/ lhs=NULL; rhs=0; if(alg_type == ONESLACK_DUAL_CACHE_ALG) { rt1=get_runtime(); /* Compute violation of constraints in cache for current w */ if(struct_verbosity>=2) rt2=get_runtime(); update_constraint_cache_for_model(ccache, svmModel); if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0); /* Is there is a sufficiently violated constraint in cache? */ viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2); if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { /* There is a sufficiently violated constraint in cache, so use this constraint in this iteration. */ if(struct_verbosity>=2) rt2=get_runtime(); viol=find_most_violated_joint_constraint_in_cache(ccache, epsilon_est/2,lhs_n,&lhs,&rhs); if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); cached_constraint=1; } else { /* There is no sufficiently violated constraint in cache, so update cache by computing most violated constraint explicitly for batch_size examples. */ viol_est=0; progress=0; viol=compute_violation_of_constraint_in_cache(ccache,0); for(j=0;(j<batch_size) || ((j<n)&&(viol-slack<sparm->epsilon));j++) { if(struct_verbosity>=1) print_percent_progress(&progress,n,10,"."); uptr=uptr % n; if(randmapping) i=randmapping[uptr]; else i=uptr; /* find most violating fydelta=fy-fybar and rhs for example i */ find_most_violated_constraint(&fydelta,&rhs_i,&ex[i], fycache[i],n,sm,sparm, &rt_viol,&rt_psi,&argmax_count); /* add current fy-fybar and loss to cache */ if(struct_verbosity>=2) rt2=get_runtime(); viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model, i,fydelta,rhs_i,0.0001*sparm->epsilon/n, sparm->ccache_size,&rt_cachesum); if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0); viol_est+=ccache->constlist[i]->viol; uptr++; } cached_constraint=(j<n); if(struct_verbosity>=2) rt2=get_runtime(); if(cached_constraint) viol=find_most_violated_joint_constraint_in_cache(ccache, epsilon_est/2,lhs_n,&lhs,&rhs); else viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n, &lhs,&rhs); if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); viol_est*=((double)n/j); epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack); if((struct_verbosity >= 1) && (j!=n)) printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)", 100.0*j/n,viol_est-slack,epsilon_est); } lhsXw=rhs-viol; rt_total+=MAX(get_runtime()-rt1,0); } else { /* do not use constraint from cache */ rt1=get_runtime(); cached_constraint=0; if(kparm->kernel_type == LINEAR_KERNEL) clear_nvector(lhs_n,sm->sizePsi); progress=0; rt_total+=MAX(get_runtime()-rt1,0); for(i=0; i<n; i++) { rt1=get_runtime(); if(struct_verbosity>=1) print_percent_progress(&progress,n,10,"."); /* compute most violating fydelta=fy-fybar and rhs for example i */ find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n, sm,sparm,&rt_viol,&rt_psi,&argmax_count); /* add current fy-fybar to lhs of constraint */ if(kparm->kernel_type == LINEAR_KERNEL) { add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */ free_svector(fydelta); } else { append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */ lhs=fydelta; } rhs+=rhs_i; /* add loss to rhs */ rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); /* create sparse vector from dense sum */ if(kparm->kernel_type == LINEAR_KERNEL) lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0, COMPACT_ROUNDING_THRESH); doc=create_example(cset.m,0,1,1,lhs); lhsXw=classify_example(svmModel,doc); free_example(doc,0); viol=rhs-lhsXw; rt_total+=MAX(get_runtime()-rt1,0); } /* end of finding most violated joint constraint */ rt1=get_runtime(); /**** if `error', then add constraint and recompute QP ****/ if(slack > (rhs-lhsXw+0.000001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw); /* exit(1); */ } ceps=MAX(0,rhs-lhsXw-slack); if((ceps > sparm->epsilon) || cached_constraint) { /**** resize constraint matrix and add new constraint ****/ cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1)); cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs); cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1)); cset.rhs[cset.m]=rhs; alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1)); alpha[cset.m]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1)); alphahist[cset.m]=optcount; cset.m++; totconstraints++; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) { if(struct_verbosity>=2) rt2=get_runtime(); kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1, &cset,kparm); if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0); } /**** get new QP solution ****/ if(struct_verbosity>=1) { printf("*");fflush(stdout); } if(struct_verbosity>=2) rt2=get_runtime(); /* set svm precision so that higher than eps of most violated constr */ if(cached_constraint) { epsilon_cached=MIN(epsilon_cached,ceps); lparm->epsilon_crit=epsilon_cached/2; } else { epsilon=MIN(epsilon,ceps); /* best eps so far */ lparm->epsilon_crit=epsilon/2; epsilon_cached=epsilon; } free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Run the QP solver on cset. */ kernel_type_org=kparm->kernel_type; if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) kparm->kernel_type=GRAM; /* use kernel stored in kparm */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, lparm,kparm,NULL,svmModel,alpha); kparm->kernel_type=kernel_type_org; svmModel->kernel_parm.kernel_type=kernel_type_org; /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=3) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist,50); if(struct_verbosity>=3) printf("done. "); } else { free_svector(lhs); } if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); rt_total+=MAX(get_runtime()-rt1,0); } while(finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)|| cached_constraint || (ceps > sparm->epsilon) ); // originally like below ... finalize_iteration was not called because of short-circuit evaluation // } while(cached_constraint || (ceps > sparm->epsilon) || // finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm) // ); if(struct_verbosity>=1) { printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,ceps)); slack=0; for(j=0;j<cset.m;j++) slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; if(kparm->kernel_type == LINEAR_KERNEL) modellength=model_length_n(svmModel); else modellength=model_length_s(svmModel); dualitygap=(0.5*modellength*modellength+sparm->C*viol) -(alphasum-0.5*modellength*modellength); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Primal objective value: pval=%.5f\n", 0.5*modellength*modellength+sparm->C*viol); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Norm of weight vector: |w|=%.5f\n",modellength); printf("Value of slack variable (on working set): xi=%.5f\n",slack); printf("Value of slack variable (global): xi=%.5f\n",viol); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); if(struct_verbosity>=2) printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total, (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total, (100.0*rt_cachesum)/rt_total); else if(struct_verbosity==1) printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0); } if(ccache) { long cnum=0; CCACHEELEM *celem; for(i=0;i<n;i++) for(celem=ccache->constlist[i];celem;celem=celem->next) cnum++; printf("Final number of constraints in cache: %ld\n",cnum); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ free_model(svmModel,0); } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(lhs_n) free_nvector(lhs_n); if(ccache) free_constraint_cache(ccache); for(i=0;i<n;i++) if(fycache[i]) free_svector(fycache[i]); free(fycache); free(alpha); free(alphahist); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); if(kparm->gram_matrix) free_matrix(kparm->gram_matrix); }
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long newconstraints=0, totconstraints=0, activenum=0; int opti_round, *opti, fullround, use_shrinking; long old_totconstraints=0; double epsilon,svmCnorm; long tolerance,new_precision=1,dont_stop=0; double lossval,factor,dist; double margin=0; double slack, *slacks, slacksum, ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0,lastoptcount=0; CONSTSET cset; SVECTOR *diff=NULL; SVECTOR *fy, *fybar, *f, **fycache=NULL; SVECTOR *slackvec; WORD slackv[2]; MODEL *svmModel=NULL; KERNEL_CACHE *kcache=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0; double rt1,rt2; rt1=get_runtime(); init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ /* initialize shrinking-style example selection heuristic */ if(alg_type == NSLACK_SHRINK_ALG) use_shrinking=1; else use_shrinking=0; opti=(int*)my_malloc(n*sizeof(int)); for(i=0;i<n;i++) { opti[i]=0; } opti_round=0; /* normalize regularization parameter C by the number of training examples */ svmCnorm=sparm->C/n; if(sparm->slack_norm == 1) { lparm->svm_c=svmCnorm; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ lparm->sharedslack=0; if(kparm->kernel_type != LINEAR_KERNEL) { printf("ERROR: Kernels are not implemented for L2 slack norm!"); fflush(stdout); exit(0); } } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } epsilon=100.0; /* start with low precision and increase later */ tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less than that number of constraints is not fulfilled */ lparm->biased_hyperplane=0; /* set threshold to zero */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } /* set initial model and slack variables*/ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ if(USE_FYCACHE) { fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ free_svector(fy); fy=diff; } fycache[i]=fy; } } rt_init+=MAX(get_runtime()-rt1,0); rt_total+=MAX(get_runtime()-rt1,0); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively increase precision */ epsilon=MAX(epsilon*0.49999999999,sparm->epsilon); new_precision=1; if(epsilon == sparm->epsilon) /* for final precision, find all SV */ tolerance=0; lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ if(struct_verbosity>=1) printf("Setting current working precision to %g.\n",epsilon); do { /* iteration until (approx) all SV are found for current precision and tolerance */ opti_round++; activenum=n; dont_stop=0; old_totconstraints=totconstraints; do { /* with shrinking turned on, go through examples that keep producing new constraints */ if(struct_verbosity>=1) { printf("Iter %i (%ld active): ",++numIt,activenum); fflush(stdout); } ceps=0; fullround=(activenum == n); for(i=0; i<n; i++) { /*** example loop ***/ rt1=get_runtime(); if((!use_shrinking) || (opti[i] != opti_round)) { /* if the example is not shrunk away, then see if it is necessary to add a new constraint */ rt2=get_runtime(); argmax_count++; if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); rt_viol+=MAX(get_runtime()-rt2,0); if(empty_label(ybar)) { if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } if(struct_verbosity>=2) printf("no-incorrect-found(%i) ",i); continue; } /**** get psi(y)-psi(ybar) ****/ rt2=get_runtime(); if(fycache) fy=copy_svector(fycache[i]); else fy=psi(ex[i].x,ex[i].y,sm,sparm); fybar=psi(ex[i].x,ybar,sm,sparm); rt_psi+=MAX(get_runtime()-rt2,0); /**** scale feature vector and margin by loss ****/ lossval=loss(ex[i].y,ybar,sparm); if(sparm->slack_norm == 2) lossval=sqrt(lossval); if(sparm->loss_type == SLACK_RESCALING) factor=lossval; else /* do not rescale vector for */ factor=1.0; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; margin=lossval; /**** create constraint for current ybar ****/ append_svector_list(fy,fybar);/* append the two vector lists */ doc=create_example(cset.m,0,i+1,1,fy); /**** compute slack for this example ****/ slack=0; for(j=0;j<cset.m;j++) if(cset.lhs[j]->slackid == i+1) { if(sparm->slack_norm == 2) /* works only for linear kernel */ slack=MAX(slack,cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+i]/(sqrt(2*svmCnorm)))); else slack=MAX(slack, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } /**** if `error' add constraint and recompute ****/ dist=classify_example(svmModel,doc); ceps=MAX(ceps,margin-dist-slack); if(slack > (margin-dist+0.0001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist); /* exit(1); */ } if((dist+slack)<(margin-epsilon)) { if(struct_verbosity>=2) {printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);} if(struct_verbosity==1) {printf("."); fflush(stdout);} /**** resize constraint matrix and add new constraint ****/ cset.m++; cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m); if(kparm->kernel_type == LINEAR_KERNEL) { diff=add_list_ss(fy); /* store difference vector directly */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(diff)); else if(sparm->slack_norm == 2) { /**** add squared slack variable to feature vector ****/ slackv[0].wnum=sizePsi+i; slackv[0].weight=1/(sqrt(2*svmCnorm)); slackv[1].wnum=0; /*terminator*/ slackvec=create_svector(slackv,NULL,1.0); cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, add_ss(diff,slackvec)); free_svector(slackvec); } free_svector(diff); } else { /* kernel is used */ if(sparm->slack_norm == 1) cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, copy_svector(fy)); else if(sparm->slack_norm == 2) exit(1); } cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m); cset.rhs[cset.m-1]=margin; alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alpha[cset.m-1]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); alphahist[cset.m-1]=optcount; newconstraints++; totconstraints++; } else { printf("+"); fflush(stdout); if(opti[i] != opti_round) { activenum--; opti[i]=opti_round; } } free_example(doc,0); free_svector(fy); /* this also free's fybar */ free_label(ybar); } /**** get new QP solution ****/ if((newconstraints >= sparm->newconstretrain) || ((newconstraints > 0) && (i == n-1)) || (new_precision && (i == n-1))) { if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ if(kparm->kernel_type != LINEAR_KERNEL) kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); /* Run the QP solver on cset. */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,kcache,svmModel,alpha); if(kcache) kernel_cache_cleanup(kcache); /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; rt_opt+=MAX(get_runtime()-rt2,0); if(new_precision && (epsilon <= sparm->epsilon)) dont_stop=1; /* make sure we take one final pass */ new_precision=0; newconstraints=0; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=2) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist, MAX(50,optcount-lastoptcount)); lastoptcount=optcount; if(struct_verbosity>=2) printf("done. (NumConst=%d)\n",cset.m); rt_total+=MAX(get_runtime()-rt1,0); } while(use_shrinking && (activenum > 0)); /* when using shrinking, repeat until all examples produced no constraint at least once */ } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop); } while((epsilon > sparm->epsilon) || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm)); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ /**** WARNING: If positivity constraints are used, then the maximum slack id is larger than what is allocated below ****/ slacks=(double *)my_malloc(sizeof(double)*(n+1)); for(i=0; i<=n; i++) { slacks[i]=0; } if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { for(j=0;j<cset.m;j++) slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid], cset.rhs[j] -(classify_example(svmModel,cset.lhs[j]) -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm)))); } slacksum=0; for(i=1; i<=n; i++) slacksum+=slacks[i]; free(slacks); alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; modellength=model_length_s(svmModel); dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps)) -(alphasum-0.5*modellength*modellength); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,epsilon)); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Number of non-zero slack variables: %ld (out of %ld)\n", svmModel->at_upper_bound,n); printf("Norm of weight vector: |w|=%.5f\n",modellength); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", modellength); } printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(fycache) { for(i=0;i<n;i++) free_svector(fycache[i]); free(fycache); } if(svmModel) free_model(svmModel,0); free(alpha); free(alphahist); free(opti); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); }
int main(int argc, char* argv[]) { double *w; /* weight vector */ long m, i; double C, epsilon; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; char trainfile[1024]; char modelfile[1024]; int MAX_ITER; /* new struct variables */ SVECTOR **fycache, *diff, *fy; EXAMPLE *ex; SAMPLE alldata; SAMPLE sample; SAMPLE val; STRUCT_LEARN_PARM sparm; STRUCTMODEL sm; double primal_obj; double stop_crit; char itermodelfile[2000]; /* self-paced learning variables */ double init_spl_weight; double spl_weight; double spl_factor; int *valid_examples; /* read input parameters */ my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm, &init_spl_weight, &spl_factor); epsilon = learn_parm.eps; C = learn_parm.svm_c; MAX_ITER = learn_parm.maxiter; /* read in examples */ alldata = read_struct_examples(trainfile,&sparm); int ntrain = (int) round(1.0*alldata.n); /* no validation set */ if(ntrain < alldata.n) { long *perm = randperm(alldata.n); sample = generate_train_set(alldata, perm, ntrain); val = generate_validation_set(alldata, perm, ntrain); free(perm); } else { sample = alldata; } ex = sample.examples; m = sample.n; /* initialization */ init_struct_model(alldata,&sm,&sparm,&learn_parm,&kernel_parm); w = create_nvector(sm.sizePsi); clear_nvector(w, sm.sizePsi); sm.w = w; /* establish link to w, as long as w does not change pointer */ /* some training information */ printf("C: %.8g\n", C); printf("spl weight: %.8g\n",init_spl_weight); printf("epsilon: %.8g\n", epsilon); printf("sample.n: %d\n", sample.n); printf("sm.sizePsi: %ld\n", sm.sizePsi); fflush(stdout); /* prepare feature vector cache for correct labels with imputed latent variables */ fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*)); for (i=0;i<m;i++) { fy = psi(ex[i].x, ex[i].y, &sm, &sparm); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } /* learn initial weight vector using all training examples */ valid_examples = (int *) malloc(m*sizeof(int)); /* errors for validation set */ double cur_loss, best_loss = DBL_MAX; int loss_iter; /* initializations */ spl_weight = init_spl_weight; /* solve biconvex self-paced learning problem */ primal_obj = alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight); printf("primal objective: %.4f\n", primal_obj); fflush(stdout); //alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight); int nValid = 0; for (i=0;i<m;i++) { if(valid_examples[i]) { nValid++; } } if(ntrain < alldata.n) { cur_loss = compute_current_loss(val,&sm,&sparm); printf("CURRENT LOSS: %f\n",cur_loss); } /* write structural model */ write_struct_model(modelfile, &sm, &sparm); // skip testing for the moment /* free memory */ free_struct_sample(alldata); if(ntrain < alldata.n) { free(sample.examples); free(val.examples); } free_struct_model(sm, &sparm); for(i=0;i<m;i++) { free_svector(fycache[i]); } free(fycache); free(valid_examples); return(0); }
int main(int argc, char* argv[]) { double avghingeloss; LABEL y; long i, correct; double weighted_correct; char testfile[1024]; char modelfile[1024]; char labelfile[1024]; char latentfile[1024]; char scorefile[1024]; FILE *flabel; FILE *flatent; FILE *fscore; STRUCTMODEL model; STRUCT_LEARN_PARM sparm; SAMPLE testsample; /* read input parameters */ read_input_parameters(argc,argv,testfile,modelfile,labelfile,latentfile,scorefile,model.kernel_info_file,model.filestub, &sparm); printf("C: %f\n",sparm.C); flabel = fopen(labelfile,"w"); flatent = fopen(latentfile,"w"); fscore = fopen(scorefile, "w"); init_struct_model(model.kernel_info_file, &model, &sparm); read_struct_model(modelfile, &model); /* read test examples */ printf("Reading test examples..."); fflush(stdout); testsample = read_struct_examples(testfile, &model, &sparm); printf("done.\n"); IMAGE_KERNEL_CACHE ** cached_images = init_cached_images(testsample.examples,&model); avghingeloss = 0.0; correct = 0; weighted_correct=0.0; int *valid_example_kernel = (int *) malloc(5*sizeof(int)); for(i = 0; i < model.num_kernels; i++) valid_example_kernel[i] = 1; double total_example_weight = 0; int num_distinct_examples = 0; int last_image_id = -1; LATENT_VAR h = make_latent_var(&model); double * scores = (double *)calloc(sparm.n_classes, sizeof(double)); for (i=0;i<testsample.n;i++) { while (testsample.examples[i].x.image_id == last_image_id) i++; last_image_id = testsample.examples[i].x.image_id; num_distinct_examples++; // if(finlatent) { // read_latent_var(&h,finlatent); //printf("%d %d\n",h.position_x,h.position_y); // } //printf("%f\n",sparm.C); struct timeval start_time; struct timeval finish_time; gettimeofday(&start_time, NULL); classify_struct_example(testsample.examples[i].x,&y,&h,cached_images,&model,&sparm,1); gettimeofday(&finish_time, NULL); double microseconds = 1e6 * (finish_time.tv_sec - start_time.tv_sec) + (finish_time.tv_usec - start_time.tv_usec); //printf("This ESS call took %f milliseconds.\n", microseconds/1e3); total_example_weight += testsample.examples[i].x.example_cost; //double hinge_l = get_hinge_l_from_pos_score(pos_score,testsample.examples[i].y); //printf("with a pos_score of %f, a label of %d we get a hinge_l of %f\n", pos_score, testsample.examples[i].y.label, hinge_l); // double weighted_hinge_l = hinge_l * testsample.examples[i].x.example_cost; //avghingeloss += weighted_hinge_l; //if (hinge_l<1) { //A classification is considered "correct" if it guesses one of the objects in the image if (y.label == testsample.examples[i].y.label || testsample.examples[i].x.also_correct[y.label]) { correct++; weighted_correct+=testsample.examples[i].x.example_cost; } print_label(y, flabel); fprintf(flabel,"\n"); fflush(flabel); print_latent_var(testsample.examples[i].x, h, flatent); get_class_scores(testsample.examples[i].x, cached_images, scores, &model, &sparm); fprintf(fscore, "%s ", testsample.examples[i].x.image_path); for (int j = 0; j < sparm.n_classes; ++j) { fprintf(fscore, "%f ", scores[j]); } fprintf(fscore, "\n"); } free_latent_var(h); fclose(flabel); fclose(flatent); free(scores); //double w_cost = regularizaton_cost(model.w_curr.get_vec(), model.sizePsi); //avghingeloss = avghingeloss/testsample.n; printf("\n"); //printf("Objective Value with C=%f is %f\n\n\n", sparm.C, (sparm.C * avghingeloss) + w_cost); //printf("Average hinge loss on dataset: %.4f\n", avghingeloss); printf("Zero/one error on test set: %.4f\n", 1.0 - ((float) correct) / (1.0 * num_distinct_examples)); printf("Weighted zero/one error on the test set %.4f\n", 1.0 - (weighted_correct/total_example_weight)); printf("zeroone %.4f weightedzeroone %.4f\n", 1.0 - ((float) correct) / (1.0 * num_distinct_examples), 1.0 - (weighted_correct/total_example_weight)); fclose(fscore); free_cached_images(cached_images, &model); //free_struct_sample(testsample); free_struct_model(model,&sparm); return(0); }
int main(int argc, char* argv[]) { double *w; /* weight vector */ int outer_iter; long m, i; double C, epsilon; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; char trainfile[1024]; char modelfile[1024]; int MAX_ITER; /* new struct variables */ SVECTOR **fycache, *diff, *fy; EXAMPLE *ex; SAMPLE sample; STRUCT_LEARN_PARM sparm; STRUCTMODEL sm; //double decrement; double primal_obj;//, last_primal_obj; //double cooling_eps; //double stop_crit; DebugConfiguration::VerbosityLevel = VerbosityLevel::None; /* read input parameters */ my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm); epsilon = learn_parm.eps; C = learn_parm.svm_c; MAX_ITER = learn_parm.maxiter; /* read in examples */ sample = read_struct_examples(trainfile,&sparm); ex = sample.examples; m = sample.n; /* initialization */ init_struct_model(sample,&sm,&sparm,&learn_parm,&kernel_parm); w = sm.w; //w = create_nvector(sm.sizePsi); //clear_nvector(w, sm.sizePsi); //sm.w = w; /* establish link to w, as long as w does not change pointer */ /* some training information */ printf("C: %.8g\n", C); printf("epsilon: %.8g\n", epsilon); printf("sample.n: %ld\n", sample.n); printf("sm.sizePsi: %ld\n", sm.sizePsi); fflush(stdout); /* impute latent variable for first iteration */ init_latent_variables(&sample,&learn_parm,&sm,&sparm); /* prepare feature vector cache for correct labels with imputed latent variables */ fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*)); for (i=0; i<m; i++) { fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm); /* DEBUG */ printf("true_psi[%d]=", i); for (int j = 0; j < sm.sizePsi; ++j) printf("%.4lf ", fy->words[j].weight); printf("\n"); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } /* outer loop: latent variable imputation */ outer_iter = 1; //last_primal_obj = 0; //decrement = 0; //cooling_eps = 0.5*C*epsilon; //while ((outer_iter<=MIN_OUTER_ITER)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) { while (outer_iter<MAX_OUTER_ITER) { LearningTracker::NextOuterIteration(); printf("OUTER ITER %d\n", outer_iter); /* cutting plane algorithm */ primal_obj = cutting_plane_algorithm(w, m, MAX_ITER, C, /*cooling_eps, */fycache, ex, &sm, &sparm); /* compute decrement in objective in this outer iteration */ /* decrement = last_primal_obj - primal_obj; last_primal_obj = primal_obj; printf("primal objective: %.4f\n", primal_obj); printf("decrement: %.4f\n", decrement); fflush(stdout); stop_crit = (decrement<C*epsilon)&&(cooling_eps<0.5*C*epsilon+1E-8); cooling_eps = -decrement*0.01; cooling_eps = MAX(cooling_eps, 0.5*C*epsilon); printf("cooling_eps: %.8g\n", cooling_eps); */ /* print new weights */ printf("W="); for (i = 1; i <= sm.sizePsi; ++i) printf("%.3f ", sm.w[i]); printf("\n"); /* Save model */ char modelfile_tmp[1024]; sprintf(modelfile_tmp, "%s.%d", modelfile, outer_iter); write_struct_model(modelfile_tmp, &sm, &sparm); /* impute latent variable using updated weight vector */ for (i=0; i<m; i++) { free_latent_var(ex[i].h); ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm); } /* re-compute feature vector cache */ for (i=0; i<m; i++) { free_svector(fycache[i]); fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm); /* DEBUG */ printf("true_psi[%d]=", i); for (int j = 0; j < sm.sizePsi; ++j) printf("%.4lf ", fy->words[j].weight); printf("\n"); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } outer_iter++; } // end outer loop /* write structural model */ write_struct_model(modelfile, &sm, &sparm); // skip testing for the moment /* free memory */ free_struct_sample(sample); free_struct_model(sm, &sparm); for(i=0; i<m; i++) { free_svector(fycache[i]); } free(fycache); return(0); }
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, int alg_type) { int i,j; int numIt=0; long argmax_count=0; long totconstraints=0; long kernel_type_org; double epsilon,epsilon_cached; double lossval,factor,dist; double margin=0; double slack, slacksum, ceps; double dualitygap,modellength,alphasum; long sizePsi; double *alpha=NULL; long *alphahist=NULL,optcount=0; CONSTSET cset; SVECTOR *diff=NULL; double *diff_n=NULL; SVECTOR *fy, *fybar, *f, **fycache, *lhs; MODEL *svmModel=NULL; LABEL ybar; DOC *doc; long n=sample.n; EXAMPLE *ex=sample.examples; double rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0; double rt1,rt2; double progress,progress_old; /* SVECTOR ***fydelta_cache=NULL; double **loss_cache=NULL; int cache_size=0; */ CCACHE *ccache=NULL; int cached_constraint; rt1=get_runtime(); init_struct_model(sample,sm,sparm,lparm,kparm); sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ if(sparm->slack_norm == 1) { lparm->svm_c=sparm->C; /* set upper bound C */ lparm->sharedslack=1; } else if(sparm->slack_norm == 2) { printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); fflush(stdout); exit(0); } else { printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); exit(0); } lparm->biased_hyperplane=0; /* set threshold to zero */ epsilon=100.0; /* start with low precision and increase later */ epsilon_cached=epsilon; /* epsilon to use for iterations using constraints constructed from the constraint cache */ cset=init_struct_constraints(sample, sm, sparm); if(cset.m > 0) { alpha=(double *)realloc(alpha,sizeof(double)*cset.m); alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); for(i=0; i<cset.m; i++) { alpha[i]=0; alphahist[i]=-1; /* -1 makes sure these constraints are never removed */ } } kparm->gram_matrix=NULL; if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) kparm->gram_matrix=init_kernel_matrix(&cset,kparm); /* set initial model and slack variables */ svmModel=(MODEL *)my_malloc(sizeof(MODEL)); lparm->epsilon_crit=epsilon; svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,NULL,svmModel,alpha); add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ /* create a cache of the feature vectors for the correct labels */ fycache=(SVECTOR **)malloc(n*sizeof(SVECTOR *)); for(i=0;i<n;i++) { fy=psi(ex[i].x,ex[i].y,sm,sparm); if(kparm->kernel_type == LINEAR) { diff=add_list_ss(fy); /* store difference vector directly */ free_svector(fy); fy=diff; } fycache[i]=fy; } /* initialize the constraint cache */ if(alg_type == DUAL_CACHE_ALG) { ccache=create_constraint_cache(sample,sparm); } rt_init+=MAX(get_runtime()-rt1,0); rt_total+=MAX(get_runtime()-rt1,0); /*****************/ /*** main loop ***/ /*****************/ do { /* iteratively find and add constraints to working set */ if(struct_verbosity>=1) { printf("Iter %i: ",++numIt); fflush(stdout); } rt1=get_runtime(); /**** compute current slack ****/ slack=0; for(j=0;j<cset.m;j++) slack=MAX(slack,cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); /**** find a violated joint constraint ****/ lhs=NULL; dist=0; if(alg_type == DUAL_CACHE_ALG) { /* see if it is possible to construct violated constraint from cache */ update_constraint_cache_for_model(ccache, svmModel); dist=find_most_violated_joint_constraint_in_cache(ccache,&lhs,&margin); } rt_total+=MAX(get_runtime()-rt1,0); /* Is there a sufficiently violated constraint in cache? */ if(dist-slack > MAX(epsilon/10,sparm->epsilon)) { /* use constraint from cache */ rt1=get_runtime(); cached_constraint=1; if(kparm->kernel_type == LINEAR) { diff=add_list_ns(lhs); /* Linear case: compute weighted sum */ free_svector_shallow(lhs); } else { /* Non-linear case: make sure we have deep copy for cset */ diff=copy_svector(lhs); free_svector_shallow(lhs); } rt_total+=MAX(get_runtime()-rt1,0); } else { /* do not use constraint from cache */ rt1=get_runtime(); cached_constraint=0; if(lhs) free_svector_shallow(lhs); lhs=NULL; if(kparm->kernel_type == LINEAR) { diff_n=create_nvector(sm->sizePsi); clear_nvector(diff_n,sm->sizePsi); } margin=0; progress=0; progress_old=progress; rt_total+=MAX(get_runtime()-rt1,0); /**** find most violated joint constraint ***/ for(i=0; i<n; i++) { rt1=get_runtime(); progress+=10.0/n; if((struct_verbosity==1) && (((int)progress_old) != ((int)progress))) {printf(".");fflush(stdout); progress_old=progress;} if(struct_verbosity>=2) {printf("."); fflush(stdout);} rt2=get_runtime(); argmax_count++; if(sparm->loss_type == SLACK_RESCALING) ybar=find_most_violated_constraint_slackrescaling(ex[i].x, ex[i].y,sm, sparm); else ybar=find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y,sm, sparm); rt_viol+=MAX(get_runtime()-rt2,0); if(empty_label(ybar)) { printf("ERROR: empty label was returned for example (%i)\n",i); /* exit(1); */ continue; } /**** get psi(x,y) and psi(x,ybar) ****/ rt2=get_runtime(); fy=copy_svector(fycache[i]); /*<= fy=psi(ex[i].x,ex[i].y,sm,sparm);*/ fybar=psi(ex[i].x,ybar,sm,sparm); rt_psi+=MAX(get_runtime()-rt2,0); lossval=loss(ex[i].y,ybar,sparm); free_label(ybar); /**** scale feature vector and margin by loss ****/ if(sparm->loss_type == SLACK_RESCALING) factor=lossval/n; else /* do not rescale vector for */ factor=1.0/n; /* margin rescaling loss type */ for(f=fy;f;f=f->next) f->factor*=factor; for(f=fybar;f;f=f->next) f->factor*=-factor; append_svector_list(fybar,fy); /* compute fy-fybar */ /**** add current fy-fybar and loss to cache ****/ if(alg_type == DUAL_CACHE_ALG) { if(kparm->kernel_type == LINEAR) add_constraint_to_constraint_cache(ccache,svmModel,i, add_list_ss(fybar), lossval/n,sparm->ccache_size); else add_constraint_to_constraint_cache(ccache,svmModel,i, copy_svector(fybar), lossval/n,sparm->ccache_size); } /**** add current fy-fybar to constraint and margin ****/ if(kparm->kernel_type == LINEAR) { add_list_n_ns(diff_n,fybar,1.0); /* add fy-fybar to sum */ free_svector(fybar); } else { append_svector_list(fybar,lhs); /* add fy-fybar to vector list */ lhs=fybar; } margin+=lossval/n; /* add loss to rhs */ rt_total+=MAX(get_runtime()-rt1,0); } /* end of example loop */ rt1=get_runtime(); /* create sparse vector from dense sum */ if(kparm->kernel_type == LINEAR) { diff=create_svector_n(diff_n,sm->sizePsi,"",1.0); free_nvector(diff_n); } else { diff=lhs; } rt_total+=MAX(get_runtime()-rt1,0); } /* end of finding most violated joint constraint */ rt1=get_runtime(); /**** if `error', then add constraint and recompute QP ****/ doc=create_example(cset.m,0,1,1,diff); dist=classify_example(svmModel,doc); ceps=MAX(0,margin-dist-slack); if(slack > (margin-dist+0.000001)) { printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); printf("slack=%f, newslack=%f\n",slack,margin-dist); /* exit(1); */ } if(ceps > sparm->epsilon) { /**** resize constraint matrix and add new constraint ****/ cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1)); if(sparm->slack_norm == 1) cset.lhs[cset.m]=create_example(cset.m,0,1,1,diff); else if(sparm->slack_norm == 2) exit(1); cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1)); cset.rhs[cset.m]=margin; alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1)); alpha[cset.m]=0; alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1)); alphahist[cset.m]=optcount; cset.m++; totconstraints++; if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) { if(struct_verbosity>=1) { printf(":");fflush(stdout); } rt2=get_runtime(); kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1, &cset,kparm); rt_kernel+=MAX(get_runtime()-rt2,0); } /**** get new QP solution ****/ if(struct_verbosity>=1) { printf("*");fflush(stdout); } rt2=get_runtime(); /* set svm precision so that higher than eps of most violated constr */ if(cached_constraint) { epsilon_cached=MIN(epsilon_cached,MAX(ceps,sparm->epsilon)); lparm->epsilon_crit=epsilon_cached/2; } else { epsilon=MIN(epsilon,MAX(ceps,sparm->epsilon)); /* best eps so far */ lparm->epsilon_crit=epsilon/2; epsilon_cached=epsilon; } free_model(svmModel,0); svmModel=(MODEL *)my_malloc(sizeof(MODEL)); /* Run the QP solver on cset. */ kernel_type_org=kparm->kernel_type; if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) kparm->kernel_type=GRAM; /* use kernel stored in kparm */ svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, lparm,kparm,NULL,svmModel,alpha); kparm->kernel_type=kernel_type_org; svmModel->kernel_parm.kernel_type=kernel_type_org; /* Always add weight vector, in case part of the kernel is linear. If not, ignore the weight vector since its content is bogus. */ add_weight_vector_to_linear_model(svmModel); sm->svm_model=svmModel; sm->w=svmModel->lin_weights; /* short cut to weight vector */ optcount++; /* keep track of when each constraint was last active. constraints marked with -1 are not updated */ for(j=0;j<cset.m;j++) if((alphahist[j]>-1) && (alpha[j] != 0)) alphahist[j]=optcount; rt_opt+=MAX(get_runtime()-rt2,0); /* Check if some of the linear constraints have not been active in a while. Those constraints are then removed to avoid bloating the working set beyond necessity. */ if(struct_verbosity>=2) printf("Reducing working set...");fflush(stdout); remove_inactive_constraints(&cset,alpha,optcount,alphahist,50); if(struct_verbosity>=2) printf("done. (NumConst=%d) ",cset.m); } else { free_svector(diff); } if(struct_verbosity>=1) printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, svmModel->sv_num-1,ceps,svmModel->maxdiff); free_example(doc,0); rt_total+=MAX(get_runtime()-rt1,0); } while((ceps > sparm->epsilon) || finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm) ); if(struct_verbosity>=1) { /**** compute sum of slacks ****/ /**** WARNING: If positivity constraints are used, then the maximum slack id is larger than what is allocated below ****/ slacksum=0; if(sparm->slack_norm == 1) { for(j=0;j<cset.m;j++) slacksum=MAX(slacksum, cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); } else if(sparm->slack_norm == 2) { exit(1); } alphasum=0; for(i=0; i<cset.m; i++) alphasum+=alpha[i]*cset.rhs[i]; modellength=model_length_s(svmModel,kparm); dualitygap=(0.5*modellength*modellength+sparm->C*(slacksum+ceps)) -(alphasum-0.5*modellength*modellength); printf("Final epsilon on KKT-Conditions: %.5f\n", MAX(svmModel->maxdiff,ceps)); printf("Upper bound on duality gap: %.5f\n", dualitygap); printf("Dual objective value: dval=%.5f\n", alphasum-0.5*modellength*modellength); printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); printf("Number of iterations: %d\n",numIt); printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); if(sparm->slack_norm == 1) { printf("Number of SV: %ld \n",svmModel->sv_num-1); printf("Norm of weight vector: |w|=%.5f\n", model_length_s(svmModel,kparm)); } else if(sparm->slack_norm == 2){ printf("Number of SV: %ld (including %ld at upper bound)\n", svmModel->sv_num-1,svmModel->at_upper_bound); printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", model_length_s(svmModel,kparm)); } printf("Value of slack variable (on working set): xi=%.5f\n",slacksum); printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", length_of_longest_document_vector(cset.lhs,cset.m,kparm)); printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n", rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total, (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total); } if(ccache) { long cnum=0; CCACHEELEM *celem; for(i=0;i<n;i++) for(celem=ccache->constlist[i];celem;celem=celem->next) cnum++; printf("Final number of constraints in cache: %ld\n",cnum); } if(struct_verbosity>=4) printW(sm->w,sizePsi,n,lparm->svm_c); if(svmModel) { sm->svm_model=copy_model(svmModel); sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ } print_struct_learning_stats(sample,sm,cset,alpha,sparm); if(ccache) free_constraint_cache(ccache); for(i=0;i<n;i++) free_svector(fycache[i]); free(fycache); if(svmModel) free_model(svmModel,0); free(alpha); free(alphahist); free(cset.rhs); for(i=0;i<cset.m;i++) free_example(cset.lhs[i],1); free(cset.lhs); if(kparm->gram_matrix) free_matrix(kparm->gram_matrix); }