void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j; double *alpha; DOC **dXc; // constraint matrix double *delta; // rhs of constraints SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **G = NULL; double **G2 = NULL; double **qmatrix = NULL; SVECTOR *f; int r; // set parameters for hideo solver LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; // changed from 1e-15 lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; //qmatrix = (double **) malloc(sizeof(double *)*10); //assert(qmatrix!=NULL); printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); // add constraint dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; //alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); //assert(alpha!=NULL); //for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){ // alpha[j] = 0.0; //} alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active); assert(qmatrix!=NULL); qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size)); for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){ qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1)); } // update Gram matrix G = (double **) realloc(G, sizeof(double *)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for(j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for(j = 0; j < size_active-1; j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[size_active-1][j] = G[size_active-1][j]/2; G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); // hack: add a constant to the diagonal to make sure G is PSD G[size_active-1][size_active-1] += 1e-6; // solve QP to update alpha //r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size)); r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); fflush(stdout); //exit(1); while(r==1295) { printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization); fflush(stdout); for(i=0;i<size_active;i++) { G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization; } sparm->gram_regularization *= 10; r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1); } } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]); idle[j] = 0; } else idle[j]++; } for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){ if (alpha[size_active+j] > EQUALITY_EPSILON){ w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j]; } } for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ //assert(w[j] <= 0); if(w[j]>0){ printf("j = %ld, w[j] = %0.6f\n", j, w[j]); fflush(stdout); } } cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); for(i = 0; i < size_active; i++) { cur_slack[i] = 0.0; for(f = dXc[i]->fvec; f; f = f->next) { j = 0; while(f->words[j].wnum) { cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight; j++; } } if(cur_slack[i] >= delta[i]) cur_slack[i] = 0.0; else cur_slack[i] = delta[i]-cur_slack[i]; } mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } } if(size_active > 1) threshold = cur_slack[mv_iter]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter); } free(alpha); alpha=NULL; } // end cutting plane while loop //primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); // free memory for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],1); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); free(idle); if (svm_model!=NULL) free_model(svm_model,0); //return(primal_obj); return; }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight, char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz, int eid, int chunkid, double *w_prev, int numChunks) { // printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w); long i,j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0=NULL; double min_rho = 0.001; double max_rho; double serious_counter=0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b=-1, reg_master_obj; int null_step=1; double *w_b; double kappa=0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k=0; double initial_primal_obj; int suff_decrease_cond=0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm=0; w_b = create_nvector(sm->sizePsi); clear_nvector(w_b,sm->sizePsi); /* warm start */ for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; printf("ITER 0 \n(before cutting plane) \n"); double margin2; new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; max_rho = C; printf("Running CCCP inner loop solver: \n"); fflush(stdout); time_t iter_start, iter_end; while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; time(&iter_start); #if (DEBUG_LEVEL>0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss alpha = (double*)realloc(alpha, sizeof(double)*size_active); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int*)realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; /* proximal point */ proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active); assert(proximal_rhs!=NULL); cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); assert(cut_error!=NULL); // note g_i = - new_constraint cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active); assert(gammaG0!=NULL); /* update Gram matrix */ G = (double**)realloc(G, sizeof(double*)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for (j=0;j<size_active;j++) { G[j] = (double*)realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for (j=0;j<size_active-1;j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); /* update gammaG0 */ if (null_step==1) { gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec); } else { for (i=0;i<size_active;i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i=0;i<size_active;i++) { proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i]; } /* solve QP to update alpha */ dual_obj = 0; time_t mosek_start, mosek_end; time(&mosek_start); r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); time(&mosek_end); #if(DEBUG_LEVEL == 1) print_time(mosek_start, mosek_end, "Mosek solver"); #endif /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho)); } } z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j=0;j<size_active;j++) { sigma_k += alpha[j]*cut_error[j]; alphasum+=alpha[j]; } sigma_k/=C; gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint)); #if (DEBUG_LEVEL>0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j=0;j<size_active;j++) { if (alpha[j]<ALPHA_THRESHOLD*C) { idle[j]++; } else { idle[j]=0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); // new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) /* print primal objective */ primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; #if (DEBUG_LEVEL>0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b,w_b,sm->sizePsi); proximal_term = 0.0; for (i=1;i<sm->sizePsi+1;i++) { proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]); } reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term); #if (DEBUG_LEVEL>0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj<primal_obj_b+kappa*expected_descent) { /* extra condition to be met */ if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) { #if (DEBUG_LEVEL>0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i=0;i<size_active;i++) { cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL>0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho/10,min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) { #if (DEBUG_LEVEL>0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10*rho,max_rho); } #if (DEBUG_LEVEL>0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b/initial_primal_obj<1-decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); } time(&iter_end); #if (DEBUG_LEVEL==1) char msg[20]; sprintf(msg,"ITER %d",iter); print_time(iter_start, iter_end, msg); #endif } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i=1;i<sm->sizePsi+1;i++) { w[i] = w_b[i]; } free(w_b); return(primal_obj_b); }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { long i, j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0 = NULL; double min_rho = 0.001; double max_rho; double serious_counter = 0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b = -1, reg_master_obj; int null_step = 1; double *w_b; double kappa = 0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k = 0; double initial_primal_obj; int suff_decrease_cond = 0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm = 0; /* set parameters for hideo solver */ LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svmModel = NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon, 0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent = 0; lparm.skip_final_opt_check = 0; lparm.svm_maxqpsize = 10; lparm.svm_newvarsinqp = 0; lparm.svm_iter_to_shrink = -9999; lparm.maxiter = 100000; lparm.kernel_cache_size = 40; lparm.eps = epsilon; lparm.transduction_posratio = -1.0; lparm.svm_costratio = 1.0; lparm.svm_costratio_unlab = 1.0; lparm.svm_unlabbound = 1E-5; lparm.epsilon_a = 1E-10; /* changed from 1e-15 */ lparm.compute_loo = 0; lparm.rho = 1.0; lparm.xa_depth = 0; strcpy(lparm.alphafile, ""); kparm.poly_degree = 3; kparm.rbf_gamma = 1.0; kparm.coef_lin = 1; kparm.coef_const = 1; strcpy(kparm.custom, "empty"); w_b = create_nvector(sm->sizePsi); clear_nvector(w_b, sm->sizePsi); /* warm start */ for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value; primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; //max_rho = C; max_rho = 100 * C; // tree-edge loss not within 0-1 printf("Running CCCP inner loop solver: "); fflush(stdout); while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) { iter += 1; size_active += 1; #if (DEBUG_LEVEL > 0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active); assert(dXc != NULL); dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC)); dXc[size_active - 1]->fvec = new_constraint; dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active - 1]->costfactor = 1.0; delta = (double *) realloc(delta, sizeof(double) * size_active); assert(delta != NULL); delta[size_active - 1] = margin; alpha = (double *) realloc(alpha, sizeof(double) * size_active); assert(alpha != NULL); alpha[size_active - 1] = 0.0; idle = (int *) realloc(idle, sizeof(int) * size_active); assert(idle != NULL); idle[size_active - 1] = 0; /* proximal point */ proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active); assert(proximal_rhs != NULL); cut_error = (double *) realloc(cut_error, sizeof(double) * size_active); assert(cut_error != NULL); // note g_i = - new_constraint cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active); assert(gammaG0 != NULL); /* update Gram matrix */ G = (double **) realloc(G, sizeof(double *) * size_active); assert(G != NULL); G[size_active - 1] = NULL; for (j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double) * size_active); assert(G[j] != NULL); } for (j = 0; j < size_active - 1; j++) { G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec); G[j][size_active - 1] = G[size_active - 1][j]; } G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec); /* update gammaG0 */ if (null_step == 1) { gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec); } else { for (i = 0; i < size_active; i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i = 0; i < size_active; i++) { proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i]; } /* solve QP to update alpha */ //dual_obj = 0; //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); if (size_active > 1) { if (svmModel != NULL) free_model(svmModel, 0); svmModel = (MODEL *) my_malloc(sizeof(MODEL)); svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha); } else { assert(size_active == 1); alpha[0] = C; } /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w, sm->sizePsi); for (j = 0; j < size_active; j++) { if (alpha[j] > C * ALPHA_THRESHOLD) { add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho)); } } /* compute dual obj */ dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi); for (j = 0; j < size_active; j++) { dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j]; } z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j = 0; j < size_active; j++) { sigma_k += alpha[j] * cut_error[j]; alphasum += alpha[j]; } sigma_k /= C; gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint)); #if (DEBUG_LEVEL > 0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j = 0; j < size_active; j++) { if (alpha[j] < ALPHA_THRESHOLD * C) { idle[j]++; } else { idle[j] = 0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); /* print primal objective */ primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; #if (DEBUG_LEVEL > 0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b, w_b, sm->sizePsi); proximal_term = 0.0; for (i = 1; i < sm->sizePsi + 1; i++) { proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]); } reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term); #if (DEBUG_LEVEL > 0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj < primal_obj_b + kappa * expected_descent) { /* extra condition to be met */ if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) { #if (DEBUG_LEVEL > 0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i = 0; i < size_active; i++) { cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); cut_error[i] += C * sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL > 0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho / 10, min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active - 1] > m3 * last_sigma_k) && (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) { #if (DEBUG_LEVEL > 0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10 * rho, max_rho); } #if (DEBUG_LEVEL > 0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc, &cut_error); } } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j = 0; j < size_active; j++) { free(G[j]); free_example(dXc[j], 0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i = 1; i < sm->sizePsi + 1; i++) { w[i] = w_b[i]; } free(w_b); return (primal_obj_b); }