CFLOAT single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) /* calculate the kernel function between two vectors */ { kernel_cache_statistic++; switch(kernel_parm->kernel_type) { case 0: /* linear */ return((CFLOAT)sprod_ss(a,b)); case 1: /* polynomial */ return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); case 2: /* radial basis function */ return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq))); case 3: /* sigmoid neural net */ return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); case 4: /* custom-kernel supplied in file kernel.h*/ return((CFLOAT)custom_kernel(kernel_parm,a,b)); default: #ifdef MATLAB_MEX mexErrMsgTxt(ERR005); #else printf("Error: Unknown kernel function\n"); exit(1); #endif } }
SVECTOR *create_svector(WORD *words,char *userdefined,double factor) { SVECTOR *vec; long fnum,i; fnum=0; while(words[fnum].wnum) { fnum++; } fnum++; vec = (SVECTOR *)my_malloc(sizeof(SVECTOR)); vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum)); for(i=0;i<fnum;i++) { vec->words[i]=words[i]; } vec->twonorm_sq=sprod_ss(vec,vec); fnum=0; while(userdefined[fnum]) { fnum++; } fnum++; vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum)); for(i=0;i<fnum;i++) { vec->userdefined[i]=userdefined[i]; } vec->kernel_id=0; vec->next=NULL; vec->factor=factor; return(vec); }
CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) /* calculate the kernel function */ { kernel_cache_statistic++; switch(kernel_parm->kernel_type) { case 0: /* linear */ return((CFLOAT)sprod_ss(a->words,b->words)); case 1: /* polynomial */ return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); case 2: /* radial basis function */ return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq))); case 3: /* sigmoid neural net */ return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const)); case 4: /* custom-kernel supplied in file kernel.h*/ return((CFLOAT)custom_kernel(kernel_parm,a,b)); default: printf("Error: Unknown kernel function\n"); exit(1); } }
double sprod_i(DOC *a, DOC *b, int i, int j){ // compatibility with standard svm-light if(a->num_of_vectors>0 && b->num_of_vectors>0 ){ if(a->vectors[i]==NULL || b->vectors[j]==NULL){ printf("ERROR: first vector not defined (with a traditional kernel it must be defined)\n"); exit(-1); } else return sprod_ss(a->vectors[i]->words,b->vectors[j]->words); } return 0; }
double model_length_n(MODEL *model) /* compute length of weight vector */ { long i,totwords=model->totwords+1; double sum,*weight_n; SVECTOR *weight; if(model->kernel_parm.kernel_type != LINEAR) { printf("ERROR: model_length_n applies only to linear kernel!\n"); exit(1); } weight_n=create_nvector(totwords); clear_nvector(weight_n,totwords); for(i=1;i<model->sv_num;i++) add_list_n_ns(weight_n,model->supvec[i]->fvec,model->alpha[i]); weight=create_svector_n(weight_n,totwords,NULL,1.0); sum=sprod_ss(weight,weight); free(weight_n); free_svector(weight); return(sqrt(sum)); }
double single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) /* calculate the kernel function between two vectors */ { kernel_cache_statistic++; switch(kernel_parm->kernel_type) { case LINEAR: /* linear */ return(sprod_ss(a,b)); case POLY: /* polynomial */ return(pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); case RBF: /* radial basis function */ if(a->twonorm_sq<0) a->twonorm_sq=sprod_ss(a,a); if(b->twonorm_sq<0) b->twonorm_sq=sprod_ss(b,b); return(exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq))); case SIGMOID:/* sigmoid neural net */ return(tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); case CUSTOM: /* custom-kernel supplied in file kernel.h*/ return(custom_kernel(kernel_parm,a,b)); default: printf("Error: Unknown kernel function\n"); exit(1); } }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight, char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz, int eid, int chunkid, double *w_prev, int numChunks) { // printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w); long i,j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0=NULL; double min_rho = 0.001; double max_rho; double serious_counter=0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b=-1, reg_master_obj; int null_step=1; double *w_b; double kappa=0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k=0; double initial_primal_obj; int suff_decrease_cond=0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm=0; w_b = create_nvector(sm->sizePsi); clear_nvector(w_b,sm->sizePsi); /* warm start */ for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; printf("ITER 0 \n(before cutting plane) \n"); double margin2; new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; max_rho = C; printf("Running CCCP inner loop solver: \n"); fflush(stdout); time_t iter_start, iter_end; while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; time(&iter_start); #if (DEBUG_LEVEL>0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss alpha = (double*)realloc(alpha, sizeof(double)*size_active); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int*)realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; /* proximal point */ proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active); assert(proximal_rhs!=NULL); cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); assert(cut_error!=NULL); // note g_i = - new_constraint cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active); assert(gammaG0!=NULL); /* update Gram matrix */ G = (double**)realloc(G, sizeof(double*)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for (j=0;j<size_active;j++) { G[j] = (double*)realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for (j=0;j<size_active-1;j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); /* update gammaG0 */ if (null_step==1) { gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec); } else { for (i=0;i<size_active;i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i=0;i<size_active;i++) { proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i]; } /* solve QP to update alpha */ dual_obj = 0; time_t mosek_start, mosek_end; time(&mosek_start); r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); time(&mosek_end); #if(DEBUG_LEVEL == 1) print_time(mosek_start, mosek_end, "Mosek solver"); #endif /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho)); } } z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j=0;j<size_active;j++) { sigma_k += alpha[j]*cut_error[j]; alphasum+=alpha[j]; } sigma_k/=C; gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint)); #if (DEBUG_LEVEL>0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j=0;j<size_active;j++) { if (alpha[j]<ALPHA_THRESHOLD*C) { idle[j]++; } else { idle[j]=0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2, datasetStartIdx, chunkSz, eid, chunkid); // new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho); value = margin2 - sprod_ns(w, new_constraint); margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss .. // model score using w_prev values ('-' is used because the terms are reversed in the code) /* print primal objective */ primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss; #if (DEBUG_LEVEL>0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b,w_b,sm->sizePsi); proximal_term = 0.0; for (i=1;i<sm->sizePsi+1;i++) { proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]); } reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term); #if (DEBUG_LEVEL>0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj<primal_obj_b+kappa*expected_descent) { /* extra condition to be met */ if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) { #if (DEBUG_LEVEL>0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i=0;i<size_active;i++) { cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL>0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho/10,min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) { #if (DEBUG_LEVEL>0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10*rho,max_rho); } #if (DEBUG_LEVEL>0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b/initial_primal_obj<1-decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); } time(&iter_end); #if (DEBUG_LEVEL==1) char msg[20]; sprintf(msg,"ITER %d",iter); print_time(iter_start, iter_end, msg); #endif } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i=1;i<sm->sizePsi+1;i++) { w[i] = w_b[i]; } free(w_b); return(primal_obj_b); }
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) { long i,j; double *alpha; DOC **dXc; // constraint matrix double *delta; // rhs of constraints SVECTOR *new_constraint; int iter, size_active; double value; double threshold = 0.0; double margin; double primal_obj, cur_obj; double *cur_slack = NULL; int mv_iter; int *idle = NULL; double **G = NULL; double **G2 = NULL; double **qmatrix = NULL; SVECTOR *f; int r; // set parameters for hideo solver LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svm_model=NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon,0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent=0; lparm.skip_final_opt_check=0; lparm.svm_maxqpsize=10; lparm.svm_newvarsinqp=0; lparm.svm_iter_to_shrink=-9999; lparm.maxiter=100000; lparm.kernel_cache_size=40; lparm.eps = epsilon; lparm.transduction_posratio=-1.0; lparm.svm_costratio=1.0; lparm.svm_costratio_unlab=1.0; lparm.svm_unlabbound=1E-5; lparm.epsilon_a=1E-10; // changed from 1e-15 lparm.compute_loo=0; lparm.rho=1.0; lparm.xa_depth=0; strcpy(lparm.alphafile,""); kparm.poly_degree=3; kparm.rbf_gamma=1.0; kparm.coef_lin=1; kparm.coef_const=1; strcpy(kparm.custom,"empty"); iter = 0; size_active = 0; alpha = NULL; dXc = NULL; delta = NULL; //qmatrix = (double **) malloc(sizeof(double *)*10); //assert(qmatrix!=NULL); printf("Running structural SVM solver: "); fflush(stdout); new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); while((value>threshold+epsilon)&&(iter<MAX_ITER)) { iter+=1; size_active+=1; printf("."); fflush(stdout); // add constraint dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; //alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); //assert(alpha!=NULL); //for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){ // alpha[j] = 0.0; //} alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size))); assert(alpha!=NULL); alpha[size_active-1] = 0.0; idle = (int *) realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0; qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active); assert(qmatrix!=NULL); qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size)); for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){ qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1)); } // update Gram matrix G = (double **) realloc(G, sizeof(double *)*size_active); assert(G!=NULL); G[size_active-1] = NULL; for(j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double)*size_active); assert(G[j]!=NULL); } for(j = 0; j < size_active-1; j++) { G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[size_active-1][j] = G[size_active-1][j]/2; G[j][size_active-1] = G[size_active-1][j]; } G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); // hack: add a constant to the diagonal to make sure G is PSD G[size_active-1][size_active-1] += 1e-6; // solve QP to update alpha //r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size)); r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0); if(r >= 1293 && r <= 1296) { printf("r:%d. G might not be psd due to numerical errors.\n",r); fflush(stdout); //exit(1); while(r==1295) { printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization); fflush(stdout); for(i=0;i<size_active;i++) { G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization; } sparm->gram_regularization *= 10; r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1); } } else if(r) { printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r); exit(1); } clear_nvector(w,sm->sizePsi); for (j=0;j<size_active;j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { add_vector_ns(w,dXc[j]->fvec,alpha[j]); idle[j] = 0; } else idle[j]++; } for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){ if (alpha[size_active+j] > EQUALITY_EPSILON){ w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j]; } } for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){ w[j] = 0; } } for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){ //assert(w[j] <= 0); if(w[j]>0){ printf("j = %ld, w[j] = %0.6f\n", j, w[j]); fflush(stdout); } } cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active); for(i = 0; i < size_active; i++) { cur_slack[i] = 0.0; for(f = dXc[i]->fvec; f; f = f->next) { j = 0; while(f->words[j].wnum) { cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight; j++; } } if(cur_slack[i] >= delta[i]) cur_slack[i] = 0.0; else cur_slack[i] = delta[i]-cur_slack[i]; } mv_iter = 0; if(size_active > 1) { for(j = 0; j < size_active; j++) { if(cur_slack[j] >= cur_slack[mv_iter]) mv_iter = j; } } if(size_active > 1) threshold = cur_slack[mv_iter]; else threshold = 0.0; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples); value = margin - sprod_ns(w, new_constraint); if((iter % CLEANUP_CHECK) == 0) { printf("+"); fflush(stdout); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter); } free(alpha); alpha=NULL; } // end cutting plane while loop //primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); printf(" Inner loop optimization finished.\n"); fflush(stdout); // free memory for (j=0;j<size_active;j++) { free(G[j]); free_example(dXc[j],1); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(cur_slack); free(idle); if (svm_model!=NULL) free_model(svm_model,0); //return(primal_obj); return; }
SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm) { /* Read input examples {(x_1,y_1),...,(x_n,y_n)} from file. The type of pattern x and label y has to follow the definition in svm_struct_latent_api_types.h. */ SAMPLE sample; int i, j; SVECTOR *temp_sub=NULL; double vecDistance; long n_neighbors=0; // open the file containing candidate bounding box dimensions/labels/featurePath and image label FILE *fp = fopen(file, "r"); if(fp==NULL){ printf("Error: Cannot open input file %s\n",file); exit(1); } sample.n = 1; sample.examples = (EXAMPLE *) malloc(sample.n*sizeof(EXAMPLE)); if(!sample.examples) die("Memory error."); sample.examples[0].x.n_pos = 0; sample.examples[0].x.n_neg = 0; fscanf(fp,"%d", &sample.examples[0].n_imgs); // Initialise pattern sample.examples[0].x.example_cost = 1; sample.examples[0].x.x_is = (SUB_PATTERN *) malloc(sample.examples[0].n_imgs*sizeof(SUB_PATTERN)); if(!sample.examples[0].x.x_is) die("Memory error."); sample.examples[0].y.labels = (int *) malloc(sample.examples[0].n_imgs*sizeof(int)); if(!sample.examples[0].y.labels) die("Memory error."); SVECTOR *temp=NULL; for(i = 0; i < sample.examples[0].n_imgs; i++){ fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi1_file_name); fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi2_file_name); fscanf(fp, "%d", &sample.examples[0].x.x_is[i].id); fscanf(fp, "%d", &sample.examples[0].y.labels[i]); sample.examples[0].x.x_is[i].phi1 = read_sparse_vector(sample.examples[0].x.x_is[i].phi1_file_name, sample.examples[0].x.x_is[i].id, sparm); sample.examples[0].x.x_is[i].phi2 = read_sparse_phi2(sample.examples[0].x.x_is[i].phi2_file_name, sparm); temp = create_svector_with_index(sample.examples[0].x.x_is[i].phi2->words, "", 1, sparm->phi1_size); sample.examples[0].x.x_is[i].phi1phi2_pos = add_ss(sample.examples[0].x.x_is[i].phi1, temp); free_svector(temp); sample.examples[0].x.x_is[i].phi1phi2_neg = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size)); sample.examples[0].x.x_is[i].phi1phi2_shift = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size)*2); if(sample.examples[0].y.labels[i] == 1) { sample.examples[0].x.n_pos++; } else{ sample.examples[0].x.n_neg++; } } sample.examples[0].y.n_pos = sample.examples[0].x.n_pos; sample.examples[0].y.n_neg = sample.examples[0].x.n_neg; sample.examples[0].x.neighbors = (int **) malloc(sample.examples[0].n_imgs*sizeof(int*)); sample.examples[0].x.n_neighbors=0; for (i = 0; i < sample.examples[0].n_imgs; i++){ sample.examples[0].x.neighbors[i] = (int *) malloc(sample.examples[0].n_imgs*sizeof(int)); for (j=(i+1); j < sample.examples[0].n_imgs; j++){ temp_sub = sub_ss(sample.examples[0].x.x_is[i].phi2, sample.examples[0].x.x_is[j].phi2); vecDistance = sprod_ss(temp_sub, temp_sub); free_svector(temp_sub); if(vecDistance < sparm->pairwise_threshold){ sample.examples[0].x.neighbors[i][j]=1; sample.examples[0].x.n_neighbors++; } else{ sample.examples[0].x.neighbors[i][j]=0; } } } printf("No of neighbors = %d\n",sample.examples[0].x.n_neighbors); fflush(stdout); return(sample); }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, /*double epsilon,*/ SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { long i,j; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj/*, alphasum*/; int iter, size_active, no_violation_iter; double value; //int r; //int *idle; /* for cleaning up */ double margin; //double primal_obj; double lower_bound, approx_upper_bound; double *proximal_rhs; //double *gammaG0=NULL; //double min_rho = 0.001; //double max_rho; //double serious_counter=0; //double rho = 1.0; //double expected_descent, primal_obj_b=-1, reg_master_obj; //int null_step=1; //double *w_b; //double kappa=0.01; //double temp_var; //double proximal_term, primal_lower_bound; //double v_k; //double obj_difference; // double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k //double sigma_k; //double m2 = 0.2; //double m3 = 0.9; //double gTd; //double last_sigma_k=0; //double initial_primal_obj; //int suff_decrease_cond=0; //double decrease_proportion = 0.2; // start from 0.2 first //double z_k_norm; //double last_z_k_norm=0; /* w_b = create_nvector(sm->sizePsi); clear_nvector(w_b,sm->sizePsi); // warm start for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; }*/ iter = 0; no_violation_iter = 0; size_active = 0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; //idle = NULL; proximal_rhs = NULL; //cut_error = NULL; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); //primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value; //primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value; //primal_lower_bound = 0; //expected_descent = -primal_obj_b; //initial_primal_obj = primal_obj_b; //max_rho = C; // Non negative weight constraints int nNonNeg = sm->sizePsi - sm->firstNonNegWeightIndex + 1; G = (double**)malloc(sizeof(double*)*nNonNeg); for (j=0; j<nNonNeg; j++) { G[j] = (double*)malloc(sizeof(double)*nNonNeg); for (int k=0; k<nNonNeg; k++) { G[j][k] = 0; } G[j][j] = 1.0; } double* alphabeta = NULL; while (/*(!suff_decrease_cond)&&(expected_descent<-epsilon)&&*/(iter<MAX_ITER)&&(no_violation_iter<MAX_INNER_ITER_NO_VIOLATION)) { LearningTracker::NextInnerIteration(); iter+=1; size_active+=1; #if (DEBUG_LEVEL>0) printf("INNER ITER %d\n", iter); #endif /* add constraint */ dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active); assert(dXc!=NULL); dXc[size_active-1] = (DOC*)malloc(sizeof(DOC)); dXc[size_active-1]->fvec = new_constraint; dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active-1]->costfactor = 1.0; delta = (double*)realloc(delta, sizeof(double)*size_active); assert(delta!=NULL); delta[size_active-1] = margin; alphabeta = (double*)realloc(alphabeta, sizeof(double)*(size_active+nNonNeg)); assert(alphabeta!=NULL); alphabeta[size_active+nNonNeg-1] = 0.0; /*idle = (int*)realloc(idle, sizeof(int)*size_active); assert(idle!=NULL); idle[size_active-1] = 0;*/ /* proximal point */ proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*(size_active+nNonNeg)); assert(proximal_rhs!=NULL); /*cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); assert(cut_error!=NULL); // note g_i = - new_constraint cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); */ /*gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active); assert(gammaG0!=NULL);*/ /* update Gram matrix */ G = (double**)realloc(G, sizeof(double*)*(size_active+nNonNeg)); assert(G!=NULL); G[size_active+nNonNeg-1] = NULL; for (j=0; j<size_active+nNonNeg; j++) { G[j] = (double*)realloc(G[j], sizeof(double)*(size_active+nNonNeg)); assert(G[j]!=NULL); } for (j=0; j<size_active-1; j++) { G[size_active+nNonNeg-1][j+nNonNeg] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec); G[j+nNonNeg][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j+nNonNeg]; } G[size_active+nNonNeg-1][size_active+nNonNeg-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec); for (j=0; j<nNonNeg; j++) { WORD indicator[2]; indicator[0].wnum = j + sm->firstNonNegWeightIndex; indicator[0].weight = 1.0; indicator[1].wnum = 0; indicator[1].weight = 0.0; SVECTOR* indicator_vec = create_svector(indicator, NULL, 1.0); G[size_active+nNonNeg-1][j] = sprod_ss(dXc[size_active-1]->fvec, indicator_vec); G[j][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j]; free_svector(indicator_vec); } /* update gammaG0 */ /*if (null_step==1) { gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec); } else { for (i=0;i<size_active;i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } }*/ /* update proximal_rhs */ for (i=0; i<size_active; i++) { proximal_rhs[i+nNonNeg] = -delta[i]; //(1+rho) * (rho * gammaG0[i] - (1 + rho) * delta[i]); } for (i=0; i<nNonNeg; i++) { proximal_rhs[i] = 0; //w_b[i + 1]*rho * (1+rho); } /* DEBUG */ /* for (i = 0; i < size_active + nNonNeg; ++i) { printf("G[%d]=", i); for (j = 0; j < size_active + nNonNeg; ++j) { printf("%.4f ", G[i][j]); } printf("\n"); } printf("\n"); for (i = 0; i < size_active + nNonNeg; ++i) printf("proximal_rhs[%d]=%.4f\n", i, proximal_rhs[i]); */ /* solve QP to update alpha */ dual_obj = 0; mosek_qp_optimize(G, proximal_rhs, alphabeta, (long) size_active+nNonNeg, C, &dual_obj, nNonNeg); printf("dual_obj=%.4lf\n", dual_obj); alpha = alphabeta + nNonNeg; clear_nvector(w,sm->sizePsi); for (i = 0; i < nNonNeg; i++) { w[sm->firstNonNegWeightIndex + i] = alphabeta[i];//alphabeta[i]/(1+rho); // add betas } for (j=0; j<size_active; j++) { if (alpha[j]>C*ALPHA_THRESHOLD) { //add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho)); add_vector_ns(w,dXc[j]->fvec,alpha[j]); } } //z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); //add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho)); LearningTracker::ReportWeights(w, sm->sizePsi); /* detect if step size too small */ /* sigma_k = 0; alphasum = 0; for (j=0;j<size_active;j++) { sigma_k += alpha[j]*cut_error[j]; alphasum+=alpha[j]; } sigma_k/=C; gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint)); #if (DEBUG_LEVEL>0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif */ /* update cleanup information */ /* for (j=0;j<size_active;j++) { if (alpha[j]<ALPHA_THRESHOLD*C) { idle[j]++; } else { idle[j]=0; } } */ // update lower bound double xi = -1e+20; for (i = 0; i < size_active; ++i) { xi = MAX(xi, delta[i] - sprod_ns(w, dXc[i]->fvec)); } lower_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*xi; printf("lower_bound=%.4lf\n", lower_bound); assert(fabs(lower_bound + dual_obj) < 1e-6); LearningTracker::ReportLowerBound(lower_bound); // find new constraint new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); double violation = value - xi; if (violation > CUTTING_PLANE_EPS) { printf("New constraint is violated by %.4lf\n", violation); no_violation_iter = 0; } else { ++no_violation_iter; printf("New constraint is underviolated by %.4lf\n", violation); printf("%d more such constraints to stop\n", MAX_INNER_ITER_NO_VIOLATION - no_violation_iter); } // update upper bound approx_upper_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value; printf("approx_upper_bound=%.4lf\n", approx_upper_bound); LearningTracker::ReportUpperBound(approx_upper_bound); /* temp_var = sprod_nn(w_b,w_b,sm->sizePsi); proximal_term = 0.0; for (i=1;i<sm->sizePsi+1;i++) { proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]); } reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term); LearningTracker::ReportLowerBoundValue(reg_master_obj); #if (DEBUG_LEVEL>0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMAL_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj<primal_obj_b+kappa*expected_descent) { // extra condition to be met if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) { #if (DEBUG_LEVEL>0) printf("SERIOUS STEP\n"); #endif // update cut_error for (i=0;i<size_active;i++) { cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i=1;i<sm->sizePsi+1;i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { // increase step size #if (DEBUG_LEVEL>0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho/10,min_rho); } } else { // no sufficient decrease serious_counter--; if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) { #if (DEBUG_LEVEL>0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10*rho,max_rho); } #if (DEBUG_LEVEL>0) else printf("NULL STEP\n"); #endif } // update last_sigma_k last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; // break away from while loop if more than certain proportioal decrease in primal objective if (primal_obj_b/initial_primal_obj<1-decrease_proportion) { suff_decrease_cond = 1; } // clean up if (iter % CLEANUP_CHECK == 0) { size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); } */ } // end cutting plane while loop printf("Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j=0; j<size_active; j++) { free(G[j]); free_example(dXc[j],0); } free(G); free(dXc); free(alphabeta); free(delta); free_svector(new_constraint); //free(idle); //free(gammaG0); free(proximal_rhs); //free(cut_error); /* copy and free */ /*for (i=1;i<sm->sizePsi+1;i++) { w[i] = w_b[i]; } free(w_b);*/ //return(primal_obj_b); return lower_bound; }
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { long i, j; double xi; double *alpha; double **G; /* Gram matrix */ DOC **dXc; /* constraint matrix */ double *delta; /* rhs of constraints */ SVECTOR *new_constraint; double dual_obj, alphasum; int iter, size_active; double value; int r; int *idle; /* for cleaning up */ double margin; double primal_obj; double *proximal_rhs; double *gammaG0 = NULL; double min_rho = 0.001; double max_rho; double serious_counter = 0; double rho = 1.0; /* temporarily set it to 1 first */ double expected_descent, primal_obj_b = -1, reg_master_obj; int null_step = 1; double *w_b; double kappa = 0.1; double temp_var; double proximal_term, primal_lower_bound; double v_k; double obj_difference; double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k double sigma_k; double m2 = 0.2; double m3 = 0.9; double gTd; double last_sigma_k = 0; double initial_primal_obj; int suff_decrease_cond = 0; double decrease_proportion = 0.2; // start from 0.2 first double z_k_norm; double last_z_k_norm = 0; /* set parameters for hideo solver */ LEARN_PARM lparm; KERNEL_PARM kparm; MODEL *svmModel = NULL; lparm.biased_hyperplane = 0; lparm.epsilon_crit = MIN(epsilon, 0.001); lparm.svm_c = C; lparm.sharedslack = 1; kparm.kernel_type = LINEAR; lparm.remove_inconsistent = 0; lparm.skip_final_opt_check = 0; lparm.svm_maxqpsize = 10; lparm.svm_newvarsinqp = 0; lparm.svm_iter_to_shrink = -9999; lparm.maxiter = 100000; lparm.kernel_cache_size = 40; lparm.eps = epsilon; lparm.transduction_posratio = -1.0; lparm.svm_costratio = 1.0; lparm.svm_costratio_unlab = 1.0; lparm.svm_unlabbound = 1E-5; lparm.epsilon_a = 1E-10; /* changed from 1e-15 */ lparm.compute_loo = 0; lparm.rho = 1.0; lparm.xa_depth = 0; strcpy(lparm.alphafile, ""); kparm.poly_degree = 3; kparm.rbf_gamma = 1.0; kparm.coef_lin = 1; kparm.coef_const = 1; strcpy(kparm.custom, "empty"); w_b = create_nvector(sm->sizePsi); clear_nvector(w_b, sm->sizePsi); /* warm start */ for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } iter = 0; size_active = 0; xi = 0.0; alpha = NULL; G = NULL; dXc = NULL; delta = NULL; idle = NULL; proximal_rhs = NULL; cut_error = NULL; new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value; primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; primal_lower_bound = 0; expected_descent = -primal_obj_b; initial_primal_obj = primal_obj_b; //max_rho = C; max_rho = 100 * C; // tree-edge loss not within 0-1 printf("Running CCCP inner loop solver: "); fflush(stdout); while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) { iter += 1; size_active += 1; #if (DEBUG_LEVEL > 0) printf("ITER %d\n", iter); #endif printf("."); fflush(stdout); /* add constraint */ dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active); assert(dXc != NULL); dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC)); dXc[size_active - 1]->fvec = new_constraint; dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack) dXc[size_active - 1]->costfactor = 1.0; delta = (double *) realloc(delta, sizeof(double) * size_active); assert(delta != NULL); delta[size_active - 1] = margin; alpha = (double *) realloc(alpha, sizeof(double) * size_active); assert(alpha != NULL); alpha[size_active - 1] = 0.0; idle = (int *) realloc(idle, sizeof(int) * size_active); assert(idle != NULL); idle[size_active - 1] = 0; /* proximal point */ proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active); assert(proximal_rhs != NULL); cut_error = (double *) realloc(cut_error, sizeof(double) * size_active); assert(cut_error != NULL); // note g_i = - new_constraint cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active); assert(gammaG0 != NULL); /* update Gram matrix */ G = (double **) realloc(G, sizeof(double *) * size_active); assert(G != NULL); G[size_active - 1] = NULL; for (j = 0; j < size_active; j++) { G[j] = (double *) realloc(G[j], sizeof(double) * size_active); assert(G[j] != NULL); } for (j = 0; j < size_active - 1; j++) { G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec); G[j][size_active - 1] = G[size_active - 1][j]; } G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec); /* update gammaG0 */ if (null_step == 1) { gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec); } else { for (i = 0; i < size_active; i++) { gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); } } /* update proximal_rhs */ for (i = 0; i < size_active; i++) { proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i]; } /* solve QP to update alpha */ //dual_obj = 0; //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho); if (size_active > 1) { if (svmModel != NULL) free_model(svmModel, 0); svmModel = (MODEL *) my_malloc(sizeof(MODEL)); svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha); } else { assert(size_active == 1); alpha[0] = C; } /* DEBUG */ //printf("r: %d\n", r); fflush(stdout); /* END DEBUG */ clear_nvector(w, sm->sizePsi); for (j = 0; j < size_active; j++) { if (alpha[j] > C * ALPHA_THRESHOLD) { add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho)); } } /* compute dual obj */ dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi); for (j = 0; j < size_active; j++) { dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j]; } z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi)); add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho)); /* detect if step size too small */ sigma_k = 0; alphasum = 0; for (j = 0; j < size_active; j++) { sigma_k += alpha[j] * cut_error[j]; alphasum += alpha[j]; } sigma_k /= C; gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint)); #if (DEBUG_LEVEL > 0) for (j=0;j<size_active;j++) { printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]); } printf("sigma_k: %.8g\n", sigma_k); printf("alphasum: %.8g\n", alphasum); printf("g^T d: %.8g\n", gTd); fflush(stdout); #endif /* update cleanup information */ for (j = 0; j < size_active; j++) { if (alpha[j] < ALPHA_THRESHOLD * C) { idle[j]++; } else { idle[j] = 0; } } new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm); value = margin - sprod_ns(w, new_constraint); /* print primal objective */ primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value; #if (DEBUG_LEVEL > 0) printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout); #endif temp_var = sprod_nn(w_b, w_b, sm->sizePsi); proximal_term = 0.0; for (i = 1; i < sm->sizePsi + 1; i++) { proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]); } reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho); expected_descent = reg_master_obj - primal_obj_b; v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b; primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term); #if (DEBUG_LEVEL > 0) printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj); printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent); printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b); printf("ITER RHO: %.4f\n", rho); printf("ITER ||w-w_b||^2: %.4f\n", proximal_term); printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound); printf("ITER V_K: %.4f\n", v_k); #endif obj_difference = primal_obj - primal_obj_b; if (primal_obj < primal_obj_b + kappa * expected_descent) { /* extra condition to be met */ if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) { #if (DEBUG_LEVEL > 0) printf("SERIOUS STEP\n"); #endif /* update cut_error */ for (i = 0; i < size_active; i++) { cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi)); cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec); cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi)); cut_error[i] += C * sprod_ns(w, dXc[i]->fvec); } primal_obj_b = primal_obj; for (i = 1; i < sm->sizePsi + 1; i++) { w_b[i] = w[i]; } null_step = 0; serious_counter++; } else { /* increase step size */ #if (DEBUG_LEVEL > 0) printf("NULL STEP: SS(ii) FAILS.\n"); #endif serious_counter--; rho = MAX(rho / 10, min_rho); } } else { /* no sufficient decrease */ serious_counter--; if ((cut_error[size_active - 1] > m3 * last_sigma_k) && (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) { #if (DEBUG_LEVEL > 0) printf("NULL STEP: NS(ii) FAILS.\n"); #endif rho = MIN(10 * rho, max_rho); } #if (DEBUG_LEVEL > 0) else printf("NULL STEP\n"); #endif } /* update last_sigma_k */ last_sigma_k = sigma_k; last_z_k_norm = z_k_norm; /* break away from while loop if more than certain proportioal decrease in primal objective */ if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) { suff_decrease_cond = 1; } /* clean up */ if (iter % CLEANUP_CHECK == 0) { //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error); size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc, &cut_error); } } // end cutting plane while loop printf(" Inner loop optimization finished.\n"); fflush(stdout); /* free memory */ for (j = 0; j < size_active; j++) { free(G[j]); free_example(dXc[j], 0); } free(G); free(dXc); free(alpha); free(delta); free_svector(new_constraint); free(idle); free(gammaG0); free(proximal_rhs); free(cut_error); /* copy and free */ for (i = 1; i < sm->sizePsi + 1; i++) { w[i] = w_b[i]; } free(w_b); return (primal_obj_b); }
int parse_document(char *line, DOC *doc, double *label, long int *numwords, long int max_words_doc) { register long wpos,pos; long wnum; double weight; int numread; char featurepair[1000],junk[1000]; doc->queryid=0; doc->costfactor=1; pos=0; while(line[pos]) { /* cut off comments */ if(line[pos] == '#') { line[pos]=0; } else { pos++; } } wpos=0; if(sscanf(line,"%lf",label) == EOF) return(0); pos=0; while(isspace((int)line[pos])) pos++; while((!isspace((int)line[pos])) && line[pos]) pos++; while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) && (wpos<max_words_doc)) { /* printf("%s\n",featurepair); */ while(isspace((int)line[pos])) pos++; while((!isspace((int)line[pos])) && line[pos]) pos++; if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) { /* it is the query id */ doc->queryid=(long)wnum; } else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) { /* it is the example-dependent cost factor */ doc->costfactor=(double)weight; } else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) { /* it is a regular feature */ if(wnum<=0) { perror ("Feature numbers must be larger or equal to 1!!!\n"); printf("LINE: %s\n",line); exit (1); } if((wpos>0) && ((doc->words[wpos-1]).wnum >= wnum)) { perror ("Features must be in increasing order!!!\n"); printf("LINE: %s\n",line); exit (1); } (doc->words[wpos]).wnum=wnum; (doc->words[wpos]).weight=(FVAL)weight; wpos++; } else { perror ("Cannot parse feature/value pair!!!\n"); printf("'%s' in LINE: %s\n",featurepair,line); exit (1); } } (doc->words[wpos]).wnum=0; (*numwords)=wpos+1; doc->docnum=-1; doc->twonorm_sq=sprod_ss(doc->words,doc->words); return(1); }
void read_model(char *modelfile, MODEL *model, long int max_words, long int ll) { FILE *modelfl; long j,i; char *line; WORD *words; register long wpos; long wnum,pos; double weight; char version_buffer[100]; int numread; if(verbosity>=1) { printf("Reading model..."); fflush(stdout); } words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); line = (char *)my_malloc(sizeof(char)*ll); if ((modelfl = fopen (modelfile, "r")) == NULL) { perror (modelfile); exit (1); } fscanf(modelfl,"SVM-light Version %s\n",version_buffer); if(strcmp(version_buffer,VERSION_SVMLIGHT)) { perror ("Version of model-file does not match version of svm_classify!"); exit (1); } fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom); fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); fscanf(modelfl,"%lf%*[^\n]\n", &model->b); for(i=1;i<model->sv_num;i++) { fgets(line,(int)ll,modelfl); pos=0; wpos=0; sscanf(line,"%lf",&model->alpha[i]); while(!isspace((int)line[++pos])); while(((numread=sscanf(line+pos,"%ld:%lf",&wnum,&weight)) != EOF) && (wpos<max_words)) { if(numread != 2) { perror("Parsing error while reading model!"); printf("LINE: %s\n",line); } while(!isspace((int)line[++pos])); words[wpos].wnum=wnum; words[wpos].weight=(FVAL)weight; wpos++; } model->supvec[i] = (DOC *)my_malloc(sizeof(DOC)); (model->supvec[i])->words = (WORD *)my_malloc(sizeof(WORD)*(wpos+1)); for(j=0;j<wpos;j++) { (model->supvec[i])->words[j]=words[j]; } ((model->supvec[i])->words[wpos]).wnum=0; (model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words, (model->supvec[i])->words); (model->supvec[i])->docnum = -1; } fclose(modelfl); free(line); free(words); if(verbosity>=1) { fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1)); } }