double classify_example_linear(MODEL *model, DOC *ex) /* classifies example for linear kernel */ /* important: the model must have the linear weight vector computed */ /* important: the feature numbers in the example to classify must */ /* not be larger than the weight vector! */ { return((double)(sprod_ns(model->lin_weights,ex->vectors[0]->words)-model->b)); }
double classify_example_linear(MODEL *model, DOC *ex) /* classifies example for linear kernel */ /* important: the model must have the linear weight vector computed */ /* use: add_weight_vector_to_linear_model(&model); */ /* important: the feature numbers in the example to classify must */ /* not be larger than the weight vector! */ { double sum=0; SVECTOR *f; for(f=ex->fvec;f;f=f->next) sum+=f->factor*sprod_ns(model->lin_weights,f); return(sum-model->b); }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight, char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz, int eid, int chunkid, double *w_prev, int numChunks) { // printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w); long i,j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0=NULL; double min_rho = 0.001; double max_rho; double serious_counter=0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b=-1, reg_master_obj; int null_step=1; double *w_b; double kappa=0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k=0; double initial_primal_obj; int suff_decrease_cond=0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm=0; w_b = create_nvector(sm->sizePsi); clear_nvector(w_b,sm->sizePsi); /* warm start */ for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; printf("ITER 0 \n(before cutting plane) \n"); double margin2; new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; max_rho = C; printf("Running CCCP inner loop solver: \n"); fflush(stdout); time_t iter_start, iter_end; while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; time(&iter_start); #if (DEBUG_LEVEL>0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss alpha = (double*)realloc(alpha, sizeof(double)*size_active); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int*)realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; /* proximal point */ proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active); assert(proximal_rhs!=NULL); cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); assert(cut_error!=NULL); // note g_i = - new_constraint cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active); assert(gammaG0!=NULL); /* update Gram matrix */ G = (double**)realloc(G, sizeof(double*)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for (j=0;j<size_active;j++) { G[j] = (double*)realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for (j=0;j<size_active-1;j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); /* update gammaG0 */ if (null_step==1) { gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec); } else { for (i=0;i<size_active;i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i=0;i<size_active;i++) { proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i]; } /* solve QP to update alpha */ dual_obj = 0; time_t mosek_start, mosek_end; time(&mosek_start); r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); time(&mosek_end); #if(DEBUG_LEVEL == 1) print_time(mosek_start, mosek_end, "Mosek solver"); #endif /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho)); } } z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j=0;j<size_active;j++) { sigma_k += alpha[j]*cut_error[j]; alphasum+=alpha[j]; } sigma_k/=C; gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint)); #if (DEBUG_LEVEL>0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j=0;j<size_active;j++) { if (alpha[j]<ALPHA_THRESHOLD*C) { idle[j]++; } else { idle[j]=0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); // new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) /* print primal objective */ primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; #if (DEBUG_LEVEL>0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b,w_b,sm->sizePsi); proximal_term = 0.0; for (i=1;i<sm->sizePsi+1;i++) { proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]); } reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term); #if (DEBUG_LEVEL>0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj<primal_obj_b+kappa*expected_descent) { /* extra condition to be met */ if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) { #if (DEBUG_LEVEL>0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i=0;i<size_active;i++) { cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL>0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho/10,min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) { #if (DEBUG_LEVEL>0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10*rho,max_rho); } #if (DEBUG_LEVEL>0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b/initial_primal_obj<1-decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); } time(&iter_end); #if (DEBUG_LEVEL==1) char msg[20]; sprintf(msg,"ITER %d",iter); print_time(iter_start, iter_end, msg); #endif } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i=1;i<sm->sizePsi+1;i++) { w[i] = w_b[i]; } free(w_b); return(primal_obj_b); }
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j; double *alpha; DOC **dXc; // constraint matrix double *delta; // rhs of constraints SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **G = NULL; double **G2 = NULL; double **qmatrix = NULL; SVECTOR *f; int r; // set parameters for hideo solver LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; // changed from 1e-15 lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; //qmatrix = (double **) malloc(sizeof(double *)*10); //assert(qmatrix!=NULL); printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); // add constraint dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; //alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); //assert(alpha!=NULL); //for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){ // alpha[j] = 0.0; //} alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active); assert(qmatrix!=NULL); qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size)); for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){ qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1)); } // update Gram matrix G = (double **) realloc(G, sizeof(double *)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for(j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for(j = 0; j < size_active-1; j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[size_active-1][j] = G[size_active-1][j]/2; G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); // hack: add a constant to the diagonal to make sure G is PSD G[size_active-1][size_active-1] += 1e-6; // solve QP to update alpha //r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size)); r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); fflush(stdout); //exit(1); while(r==1295) { printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization); fflush(stdout); for(i=0;i<size_active;i++) { G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization; } sparm->gram_regularization *= 10; r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1); } } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]); idle[j] = 0; } else idle[j]++; } for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){ if (alpha[size_active+j] > EQUALITY_EPSILON){ w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j]; } } for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ //assert(w[j] <= 0); if(w[j]>0){ printf("j = %ld, w[j] = %0.6f\n", j, w[j]); fflush(stdout); } } cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); for(i = 0; i < size_active; i++) { cur_slack[i] = 0.0; for(f = dXc[i]->fvec; f; f = f->next) { j = 0; while(f->words[j].wnum) { cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight; j++; } } if(cur_slack[i] >= delta[i]) cur_slack[i] = 0.0; else cur_slack[i] = delta[i]-cur_slack[i]; } mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } } if(size_active > 1) threshold = cur_slack[mv_iter]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter); } free(alpha); alpha=NULL; } // end cutting plane while loop //primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); // free memory for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],1); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); free(idle); if (svm_model!=NULL) free_model(svm_model,0); //return(primal_obj); return; }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j,t; double *alpha; DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **psiDiffs = NULL; SVECTOR *f; int r; long fnum, last_wnum; /* set parameters for hideo solver */ LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; /* changed from 1e-15 */ lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; /*alpha = (double*)realloc(alpha, sizeof(double)*size_active); assert(alpha!=NULL); alpha[size_active-1] = 0.0;*/ /*idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0;*/ /* update Gram matrix */ psiDiffs = (double **) realloc(psiDiffs, sizeof(double *)*size_active); assert(psiDiffs!=NULL); psiDiffs[size_active-1] = NULL; psiDiffs[size_active-1] = (double *) realloc(psiDiffs[size_active-1], sizeof(double)*((sparm->phi1_size+sparm->phi2_size)*3)); assert(psiDiffs[size_active-1]!=NULL); fnum = 0; last_wnum = 0; while(dXc[size_active-1]->fvec->words[fnum].wnum) { for (t = last_wnum+1; t < dXc[size_active-1]->fvec->words[fnum].wnum; t++) { psiDiffs[size_active-1][t-1] = 0; } psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = dXc[size_active-1]->fvec->words[fnum].weight; /*if((psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]<EQUALITY_EPSILON) && (psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]>(-1*EQUALITY_EPSILON))){ psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = 0; }*/ last_wnum = dXc[size_active-1]->fvec->words[fnum].wnum; fnum++; } for (t = (last_wnum+1); t <= (sparm->phi1_size+sparm->phi2_size)*3; t++) { psiDiffs[size_active-1][t-1] = 0; } /* solve QP to update w */ clear_nvector(w,sm->sizePsi); //cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); cur_slack = (double *) realloc(cur_slack,sizeof(double)); r = mosek_qp_optimize(psiDiffs, delta, w, cur_slack, (long) size_active, C, &cur_obj, (sparm->phi1_size+sparm->phi2_size)*3, (sparm->phi1_size+sparm->phi2_size)*2); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); exit(1); } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } for(j = 1; j <= (sparm->phi1_size+sparm->phi2_size)*3; j++) { if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } /*for (j=0;j<size_active;j++) { if (cur_slack[j]>ALPHA_THRESHOLD) { idle[j] = 0; } else idle[j]++; }*/ /*mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } }*/ if(size_active > 1) //threshold = cur_slack[mv_iter]; threshold = cur_slack[0]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); /*if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &cur_slack, &delta, &dXc, &psiDiffs, &mv_iter); }*/ } // end cutting plane while loop primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0;j<size_active;j++) { free(psiDiffs[j]); free_example(dXc[j],1); } free(psiDiffs); free(dXc); //free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); //free(idle); if (svm_model!=NULL) free_model(svm_model,0); return(primal_obj); }
void find_most_violated_constraint_marginrescaling(PATTERN x, LABEL y, LABEL *ybar, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { /* Finds the most violated constraint (loss-augmented inference), i.e., computing argmax_{(ybar,hbar)} [<w,psi(x,ybar,hbar)> + loss(y,ybar,hbar)]. The output (ybar,hbar) are stored at location pointed by pointers *ybar and *hbar. */ int i, j; SVECTOR *temp_sub=NULL; double *unary_pos = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); double *unary_neg = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); double **binary = (double**)malloc((x.n_pos+x.n_neg)*sizeof(double *)); for (i = 0; i < (x.n_pos+x.n_neg); i++){ binary[i] = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); // compute unary potential for ybar.labels[i] == 1 unary_pos[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_pos); if(unary_pos[i] != 0){ unary_pos[i] = (float)(-1*unary_pos[i])/(float)(x.n_pos+x.n_neg); } // compute unary potential for ybar.labels[i] == -1 unary_neg[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_neg); if(unary_neg[i] != 0){ unary_neg[i] = (float)(-1*unary_neg[i])/(float)(x.n_pos+x.n_neg); } if(y.labels[i] == 1){ // add 1/n to 'ybar == -1' unary term unary_neg[i] -= (float)1/(float)(x.n_pos+x.n_neg); } else{ // add 1/n to 'ybar == 1' unary term unary_pos[i] -= (float)1/(float)(x.n_pos+x.n_neg); } for (j = (i+1); j < (x.n_pos+x.n_neg); j++){ if(x.neighbors[i][j]){ temp_sub = sub_ss_sq(x.x_is[i].phi1phi2_shift, x.x_is[j].phi1phi2_shift); binary[i][j] = sprod_ns(sm->w, temp_sub); assert(binary[i][j] <= 0); free_svector(temp_sub); } else{ binary[i][j] = 0; } } } if (x.n_neighbors){ for (i = 0; i < (x.n_pos+x.n_neg); i++){ for (j = (i+1); j < (x.n_pos+x.n_neg); j++){ if(binary[i][j] != 0){ binary[i][j] = (double)(-1*binary[i][j])/(double)x.n_neighbors; } } } } ybar->labels = maxflowwrapper(unary_pos, unary_neg, binary, x.n_pos, x.n_neg); free(unary_pos); free(unary_neg); for (i = 0; i < (x.n_pos+x.n_neg); i++){ free(binary[i]); } free(binary); return; }
void classify_struct_example(PATTERN x, LABEL *y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { /* Makes prediction with input pattern x with weight vector in sm->w, i.e., computing argmax_{(y)} <w,psi(x,y)>. Output pair (y) is stored at location pointed to by pointers *y. */ int i,j; SVECTOR *temp_sub=NULL; double *unary_pos = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); double *unary_neg = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); double **binary = (double**)malloc((x.n_pos+x.n_neg)*sizeof(double *)); for (i = 0; i < (x.n_pos+x.n_neg); i++){ binary[i] = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double)); // compute unary potential for ybar.labels[i] == 1 unary_pos[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_pos); if(unary_pos[i] != 0){ unary_pos[i] = (float)(-1*unary_pos[i])/(float)(x.n_pos+x.n_neg); } // compute unary potential for ybar.labels[i] == -1 unary_neg[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_neg); if(unary_neg[i] != 0){ unary_neg[i] = (float)(-1*unary_neg[i])/(float)(x.n_pos+x.n_neg); } for (j = (i+1); j < (x.n_pos+x.n_neg); j++){ if(x.neighbors[i][j]){ temp_sub = sub_ss_sq(x.x_is[i].phi1phi2_shift, x.x_is[j].phi1phi2_shift); binary[i][j] = sprod_ns(sm->w, temp_sub); assert(binary[i][j] <= 0); free_svector(temp_sub); } else{ binary[i][j] = 0; } } } if (x.n_neighbors){ for (i = 0; i < (x.n_pos+x.n_neg); i++){ for (j = (i+1); j < (x.n_pos+x.n_neg); j++){ if(binary[i][j] != 0){ binary[i][j] = (double)(-1*binary[i][j])/(double)x.n_neighbors; } } } } y->labels = maxflowwrapper(unary_pos, unary_neg, binary, x.n_pos, x.n_neg); free(unary_pos); free(unary_neg); for (i = 0; i < (x.n_pos+x.n_neg); i++){ free(binary[i]); } free(binary); return; }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, /*double epsilon,*/ SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { long i,j; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj/*, alphasum*/; int iter, size_active, no_violation_iter; double value; //int r; //int *idle; /* for cleaning up */ double margin; //double primal_obj; double lower_bound, approx_upper_bound; double *proximal_rhs; //double *gammaG0=NULL; //double min_rho = 0.001; //double max_rho; //double serious_counter=0; //double rho = 1.0; //double expected_descent, primal_obj_b=-1, reg_master_obj; //int null_step=1; //double *w_b; //double kappa=0.01; //double temp_var; //double proximal_term, primal_lower_bound; //double v_k; //double obj_difference; // double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k //double sigma_k; //double m2 = 0.2; //double m3 = 0.9; //double gTd; //double last_sigma_k=0; //double initial_primal_obj; //int suff_decrease_cond=0; //double decrease_proportion = 0.2; // start from 0.2 first //double z_k_norm; //double last_z_k_norm=0; /* w_b = create_nvector(sm->sizePsi); clear_nvector(w_b,sm->sizePsi); // warm start for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; }*/ iter = 0; no_violation_iter = 0; size_active = 0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; //idle = NULL; proximal_rhs = NULL; //cut_error = NULL; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); //primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value; //primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value; //primal_lower_bound = 0; //expected_descent = -primal_obj_b; //initial_primal_obj = primal_obj_b; //max_rho = C; // Non negative weight constraints int nNonNeg = sm->sizePsi - sm->firstNonNegWeightIndex + 1; G = (double**)malloc(sizeof(double*)*nNonNeg); for (j=0; j<nNonNeg; j++) { G[j] = (double*)malloc(sizeof(double)*nNonNeg); for (int k=0; k<nNonNeg; k++) { G[j][k] = 0; } G[j][j] = 1.0; } double* alphabeta = NULL; while (/*(!suff_decrease_cond)&&(expected_descent<-epsilon)&&*/(iter<MAX_ITER)&&(no_violation_iter<MAX_INNER_ITER_NO_VIOLATION)) { LearningTracker::NextInnerIteration(); iter+=1; size_active+=1; #if (DEBUG_LEVEL>0) printf("INNER ITER %d\n", iter); #endif /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; alphabeta = (double*)realloc(alphabeta, sizeof(double)*(size_active+nNonNeg)); assert(alphabeta!=NULL); alphabeta[size_active+nNonNeg-1] = 0.0; /*idle = (int*)realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0;*/ /* proximal point */ proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*(size_active+nNonNeg)); assert(proximal_rhs!=NULL); /*cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); assert(cut_error!=NULL); // note g_i = - new_constraint cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); */ /*gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active); assert(gammaG0!=NULL);*/ /* update Gram matrix */ G = (double**)realloc(G, sizeof(double*)*(size_active+nNonNeg)); assert(G!=NULL); G[size_active+nNonNeg-1] = NULL; for (j=0; j<size_active+nNonNeg; j++) { G[j] = (double*)realloc(G[j], sizeof(double)*(size_active+nNonNeg)); assert(G[j]!=NULL); } for (j=0; j<size_active-1; j++) { G[size_active+nNonNeg-1][j+nNonNeg] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[j+nNonNeg][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j+nNonNeg]; } G[size_active+nNonNeg-1][size_active+nNonNeg-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); for (j=0; j<nNonNeg; j++) { WORD indicator[2]; indicator[0].wnum = j + sm->firstNonNegWeightIndex; indicator[0].weight = 1.0; indicator[1].wnum = 0; indicator[1].weight = 0.0; SVECTOR* indicator_vec = create_svector(indicator, NULL, 1.0); G[size_active+nNonNeg-1][j] = sprod_ss(dXc[size_active-1]->fvec, indicator_vec); G[j][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j]; free_svector(indicator_vec); } /* update gammaG0 */ /*if (null_step==1) { gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec); } else { for (i=0;i<size_active;i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } }*/ /* update proximal_rhs */ for (i=0; i<size_active; i++) { proximal_rhs[i+nNonNeg] = -delta[i]; //(1+rho) * (rho * gammaG0[i] - (1 + rho) * delta[i]); } for (i=0; i<nNonNeg; i++) { proximal_rhs[i] = 0; //w_b[i + 1]*rho * (1+rho); } /* DEBUG */ /* for (i = 0; i < size_active + nNonNeg; ++i) { printf("G[%d]=", i); for (j = 0; j < size_active + nNonNeg; ++j) { printf("%.4f ", G[i][j]); } printf("\n"); } printf("\n"); for (i = 0; i < size_active + nNonNeg; ++i) printf("proximal_rhs[%d]=%.4f\n", i, proximal_rhs[i]); */ /* solve QP to update alpha */ dual_obj = 0; mosek_qp_optimize(G, proximal_rhs, alphabeta, (long) size_active+nNonNeg, C, &dual_obj, nNonNeg); printf("dual_obj=%.4lf\n", dual_obj); alpha = alphabeta + nNonNeg; clear_nvector(w,sm->sizePsi); for (i = 0; i < nNonNeg; i++) { w[sm->firstNonNegWeightIndex + i] = alphabeta[i];//alphabeta[i]/(1+rho); // add betas } for (j=0; j<size_active; j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { //add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho)); add_vector_ns(w,dXc[j]->fvec,alpha[j]); } } //z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); //add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho)); LearningTracker::ReportWeights(w, sm->sizePsi); /* detect if step size too small */ /* sigma_k = 0; alphasum = 0; for (j=0;j<size_active;j++) { sigma_k += alpha[j]*cut_error[j]; alphasum+=alpha[j]; } sigma_k/=C; gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint)); #if (DEBUG_LEVEL>0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif */ /* update cleanup information */ /* for (j=0;j<size_active;j++) { if (alpha[j]<ALPHA_THRESHOLD*C) { idle[j]++; } else { idle[j]=0; } } */ // update lower bound double xi = -1e+20; for (i = 0; i < size_active; ++i) { xi = MAX(xi, delta[i] - sprod_ns(w, dXc[i]->fvec)); } lower_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*xi; printf("lower_bound=%.4lf\n", lower_bound); assert(fabs(lower_bound + dual_obj) < 1e-6); LearningTracker::ReportLowerBound(lower_bound); // find new constraint new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); double violation = value - xi; if (violation > CUTTING_PLANE_EPS) { printf("New constraint is violated by %.4lf\n", violation); no_violation_iter = 0; } else { ++no_violation_iter; printf("New constraint is underviolated by %.4lf\n", violation); printf("%d more such constraints to stop\n", MAX_INNER_ITER_NO_VIOLATION - no_violation_iter); } // update upper bound approx_upper_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value; printf("approx_upper_bound=%.4lf\n", approx_upper_bound); LearningTracker::ReportUpperBound(approx_upper_bound); /* temp_var = sprod_nn(w_b,w_b,sm->sizePsi); proximal_term = 0.0; for (i=1;i<sm->sizePsi+1;i++) { proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]); } reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term); LearningTracker::ReportLowerBoundValue(reg_master_obj); #if (DEBUG_LEVEL>0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMAL_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj<primal_obj_b+kappa*expected_descent) { // extra condition to be met if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) { #if (DEBUG_LEVEL>0) printf("SERIOUS STEP\n"); #endif // update cut_error for (i=0;i<size_active;i++) { cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { // increase step size #if (DEBUG_LEVEL>0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho/10,min_rho); } } else { // no sufficient decrease serious_counter--; if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) { #if (DEBUG_LEVEL>0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10*rho,max_rho); } #if (DEBUG_LEVEL>0) else printf("NULL STEP\n"); #endif } // update last_sigma_k last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; // break away from while loop if more than certain proportioal decrease in primal objective if (primal_obj_b/initial_primal_obj<1-decrease_proportion) { suff_decrease_cond = 1; } // clean up if (iter % CLEANUP_CHECK == 0) { size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); } */ } // end cutting plane while loop printf("Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0; j<size_active; j++) { free(G[j]); free_example(dXc[j],0); } free(G); free(dXc); free(alphabeta); free(delta); free_svector(new_constraint); //free(idle); //free(gammaG0); free(proximal_rhs); //free(cut_error); /* copy and free */ /*for (i=1;i<sm->sizePsi+1;i++) { w[i] = w_b[i]; } free(w_b);*/ //return(primal_obj_b); return lower_bound; }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { long i, j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0 = NULL; double min_rho = 0.001; double max_rho; double serious_counter = 0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b = -1, reg_master_obj; int null_step = 1; double *w_b; double kappa = 0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k = 0; double initial_primal_obj; int suff_decrease_cond = 0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm = 0; /* set parameters for hideo solver */ LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svmModel = NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon, 0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent = 0; lparm.skip_final_opt_check = 0; lparm.svm_maxqpsize = 10; lparm.svm_newvarsinqp = 0; lparm.svm_iter_to_shrink = -9999; lparm.maxiter = 100000; lparm.kernel_cache_size = 40; lparm.eps = epsilon; lparm.transduction_posratio = -1.0; lparm.svm_costratio = 1.0; lparm.svm_costratio_unlab = 1.0; lparm.svm_unlabbound = 1E-5; lparm.epsilon_a = 1E-10; /* changed from 1e-15 */ lparm.compute_loo = 0; lparm.rho = 1.0; lparm.xa_depth = 0; strcpy(lparm.alphafile, ""); kparm.poly_degree = 3; kparm.rbf_gamma = 1.0; kparm.coef_lin = 1; kparm.coef_const = 1; strcpy(kparm.custom, "empty"); w_b = create_nvector(sm->sizePsi); clear_nvector(w_b, sm->sizePsi); /* warm start */ for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value; primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; //max_rho = C; max_rho = 100 * C; // tree-edge loss not within 0-1 printf("Running CCCP inner loop solver: "); fflush(stdout); while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) { iter += 1; size_active += 1; #if (DEBUG_LEVEL > 0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active); assert(dXc != NULL); dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC)); dXc[size_active - 1]->fvec = new_constraint; dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active - 1]->costfactor = 1.0; delta = (double *) realloc(delta, sizeof(double) * size_active); assert(delta != NULL); delta[size_active - 1] = margin; alpha = (double *) realloc(alpha, sizeof(double) * size_active); assert(alpha != NULL); alpha[size_active - 1] = 0.0; idle = (int *) realloc(idle, sizeof(int) * size_active); assert(idle != NULL); idle[size_active - 1] = 0; /* proximal point */ proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active); assert(proximal_rhs != NULL); cut_error = (double *) realloc(cut_error, sizeof(double) * size_active); assert(cut_error != NULL); // note g_i = - new_constraint cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active); assert(gammaG0 != NULL); /* update Gram matrix */ G = (double **) realloc(G, sizeof(double *) * size_active); assert(G != NULL); G[size_active - 1] = NULL; for (j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double) * size_active); assert(G[j] != NULL); } for (j = 0; j < size_active - 1; j++) { G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec); G[j][size_active - 1] = G[size_active - 1][j]; } G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec); /* update gammaG0 */ if (null_step == 1) { gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec); } else { for (i = 0; i < size_active; i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i = 0; i < size_active; i++) { proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i]; } /* solve QP to update alpha */ //dual_obj = 0; //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); if (size_active > 1) { if (svmModel != NULL) free_model(svmModel, 0); svmModel = (MODEL *) my_malloc(sizeof(MODEL)); svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha); } else { assert(size_active == 1); alpha[0] = C; } /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w, sm->sizePsi); for (j = 0; j < size_active; j++) { if (alpha[j] > C * ALPHA_THRESHOLD) { add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho)); } } /* compute dual obj */ dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi); for (j = 0; j < size_active; j++) { dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j]; } z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j = 0; j < size_active; j++) { sigma_k += alpha[j] * cut_error[j]; alphasum += alpha[j]; } sigma_k /= C; gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint)); #if (DEBUG_LEVEL > 0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j = 0; j < size_active; j++) { if (alpha[j] < ALPHA_THRESHOLD * C) { idle[j]++; } else { idle[j] = 0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); /* print primal objective */ primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; #if (DEBUG_LEVEL > 0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b, w_b, sm->sizePsi); proximal_term = 0.0; for (i = 1; i < sm->sizePsi + 1; i++) { proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]); } reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term); #if (DEBUG_LEVEL > 0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj < primal_obj_b + kappa * expected_descent) { /* extra condition to be met */ if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) { #if (DEBUG_LEVEL > 0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i = 0; i < size_active; i++) { cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); cut_error[i] += C * sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL > 0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho / 10, min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active - 1] > m3 * last_sigma_k) && (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) { #if (DEBUG_LEVEL > 0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10 * rho, max_rho); } #if (DEBUG_LEVEL > 0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc, &cut_error); } } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j = 0; j < size_active; j++) { free(G[j]); free_example(dXc[j], 0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i = 1; i < sm->sizePsi + 1; i++) { w[i] = w_b[i]; } free(w_b); return (primal_obj_b); }