double alternate_convex_search(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples, double spl_weight) { long i; int iter = 0, converged, nValid; double last_relaxed_primal_obj = DBL_MAX, relaxed_primal_obj, decrement; int *prev_valid_examples = (int *) malloc(m*sizeof(int)); double *best_w = (double *) malloc((sm->sizePsi+1)*sizeof(double)); for (i=0;i<sm->sizePsi+1;i++) best_w[i] = w[i]; nValid = update_valid_examples(w, m, C, fycache, ex, sm, sparm, valid_examples, spl_weight); //last_relaxed_primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples); if(nValid < m) last_relaxed_primal_obj += (double)(m-nValid)/((double)spl_weight); for (i=0;i<m;i++) { prev_valid_examples[i] = 0; } for (iter=0;;iter++) { nValid = update_valid_examples(w, m, C, fycache, ex, sm, sparm, valid_examples, spl_weight); printf("ACS Iteration %d: number of examples = %d\n",iter,nValid); fflush(stdout); converged = check_acs_convergence(prev_valid_examples,valid_examples,m); if(converged) break; for (i=0;i<sm->sizePsi+1;i++) w[i] = 0.0; if(sparm->solve_dual){ cutting_plane_algorithm_dual(w, m, MAX_ITER, C, epsilon, fycache, ex, sm, sparm, valid_examples); } else{ cutting_plane_algorithm(w, m, MAX_ITER, C, epsilon, fycache, ex, sm, sparm, valid_examples); } /*relaxed_primal_obj = cutting_plane_algorithm(w, m, MAX_ITER, C, epsilon, fycache, ex, sm, sparm, valid_examples); if(nValid < m) relaxed_primal_obj += (double)(m-nValid)/((double)spl_weight); decrement = last_relaxed_primal_obj-relaxed_primal_obj; printf("relaxed primal objective: %.4f\n", relaxed_primal_obj); if (iter) { printf("decrement: %.4f\n", decrement); fflush(stdout); } else { printf("decrement: N/A\n"); fflush(stdout); } if (decrement>=0.0) { for (i=0;i<sm->sizePsi+1;i++) { best_w[i] = w[i]; } } if (decrement <= C*epsilon) { break; } last_relaxed_primal_obj = relaxed_primal_obj;*/ for (i=0;i<sm->sizePsi+1;i++) { best_w[i] = w[i]; } for (i=0;i<m;i++) { prev_valid_examples[i] = valid_examples[i]; } } for (i=0;i<m;i++) { prev_valid_examples[i] = 1; } if (iter) { for (i=0;i<sm->sizePsi+1;i++) { w[i] = best_w[i]; } } double primal_obj; primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, prev_valid_examples); free(prev_valid_examples); free(best_w); //return; //return(relaxed_primal_obj); return(primal_obj); }
double optimizeMultiVariatePerfMeasure(SAMPLE sample, int datasetStartIdx, int chunkSz, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, double C, double Cdash, double epsilon, int MAX_ITER, LEARN_PARM *learn_parm, char *trainfile, double ***w_iters, int eid, int chunkid, int numChunks, double *zeroes){ int i; time_t time_start, time_end; double decrement; double primal_obj, last_primal_obj; double cooling_eps; double stop_crit; LATENT_VAR *imputed_h = NULL; int dataset_sz = sample.n; SVECTOR **fycache, *diff, *fy; EXAMPLE *ex = sample.examples; /* some training information */ printf("C: %.8g\n", C); printf("Cdash: %.8g\n", Cdash); printf("epsilon: %.8g\n", epsilon); printf("sample.n: %ld\n", dataset_sz); printf("sm->sizePsi: %ld\n", sm->sizePsi); fflush(stdout); /* prepare feature vector cache for correct labels with imputed latent variables */ fycache = (SVECTOR**)malloc(dataset_sz*sizeof(SVECTOR*)); for (i=0;i<dataset_sz;i++) { fy = psi(ex[i].x, ex[i].y, ex[i].h, sm, sparm); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } /* time taken stats */ time(&time_start); /* outer loop: latent variable imputation */ int outer_iter = 0; last_primal_obj = 0; decrement = 0; cooling_eps = 0.5*MAX(C,Cdash)*epsilon; while ((outer_iter<2)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) { printf("OUTER ITER %d\n", outer_iter); fflush(stdout); /* cutting plane algorithm */ time_t cp_start, cp_end; time(&cp_start); /// NOTE : Change of variables (Create 'u' by subtracting w_prev from w) create_u_variables(w_iters, eid, chunkid, numChunks, sm, zeroes); if(chunkid == 0 && eid == 0){ // First Chunk of First Epoch primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps, fycache, ex, sm, sparm, learn_parm->tmpdir, trainfile, learn_parm->frac_sim, learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm, learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz, eid, chunkid, zeroes, numChunks); // pass the zeroes vector } else if(chunkid == 0){ // First chunk of the new Epoch primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps, fycache, ex, sm, sparm, learn_parm->tmpdir, trainfile, learn_parm->frac_sim, learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm, learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz, eid, chunkid, w_iters[eid-1][numChunks-1], numChunks); // Last chunk of previous epoch } else { primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps, fycache, ex, sm, sparm, learn_parm->tmpdir, trainfile, learn_parm->frac_sim, learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm, learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz, eid, chunkid, w_iters[eid][chunkid-1], numChunks); // previous chunk id of current epoch } time(&cp_end); #if(DEBUG_LEVEL==1) char msg[20]; sprintf(msg,"OUTER ITER %d", outer_iter); print_time(cp_start, cp_end, msg); #endif /* compute decrement in objective in this outer iteration */ decrement = last_primal_obj - primal_obj; last_primal_obj = primal_obj; printf("primal objective: %.4f\n", primal_obj); printf("decrement: %.4f\n", decrement); fflush(stdout); stop_crit = (decrement<MAX(C, Cdash)*epsilon)&&(cooling_eps<0.5*MAX(C, Cdash)*epsilon+1E-8); cooling_eps = -decrement*0.01; cooling_eps = MAX(cooling_eps, 0.5*MAX(C,Cdash)*epsilon); printf("cooling_eps: %.8g\n", cooling_eps); /* impute latent variable using updated weight vector */ for(i = 0; i < dataset_sz; i ++) free_latent_var(ex[i].h); if(imputed_h != NULL) free(imputed_h); imputed_h = (LATENT_VAR*)malloc(sizeof(LATENT_VAR) * dataset_sz); infer_latent_variables_all(imputed_h, sm, sparm, dataset_sz, learn_parm->tmpdir, trainfile, datasetStartIdx, chunkSz, eid, chunkid); for (i=0;i<dataset_sz;i++) { // free_latent_var(ex[i].h); // ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm); // ILP for Pr (Z | Y_i, X_i) in our case ex[i].h = imputed_h[i]; } /* re-compute feature vector cache */ for (i=0;i<dataset_sz;i++) { free_svector(fycache[i]); fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } printf("(OnlineSVM) .. finished outer_iter %d\n",outer_iter); outer_iter++; /// NOTE: Restore the 'w' by adding the current 'u' to w_prev restore_w_variables(w_iters, eid, chunkid, numChunks, sm, zeroes); } // end outer loop time(&time_end); #if (DEBUG_LEVEL==1) print_time(time_start, time_end, "Total time"); #endif for(i=0;i<dataset_sz;i++) { free_svector(fycache[i]); } free(fycache); return primal_obj; }
int main(int argc, char* argv[]) { double *w; /* weight vector */ int outer_iter; long m, i; double C, epsilon; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; char trainfile[1024]; char modelfile[1024]; int MAX_ITER; /* new struct variables */ SVECTOR **fycache, *diff, *fy; EXAMPLE *ex; SAMPLE sample; STRUCT_LEARN_PARM sparm; STRUCTMODEL sm; //double decrement; double primal_obj;//, last_primal_obj; //double cooling_eps; //double stop_crit; DebugConfiguration::VerbosityLevel = VerbosityLevel::None; /* read input parameters */ my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm); epsilon = learn_parm.eps; C = learn_parm.svm_c; MAX_ITER = learn_parm.maxiter; /* read in examples */ sample = read_struct_examples(trainfile,&sparm); ex = sample.examples; m = sample.n; /* initialization */ init_struct_model(sample,&sm,&sparm,&learn_parm,&kernel_parm); w = sm.w; //w = create_nvector(sm.sizePsi); //clear_nvector(w, sm.sizePsi); //sm.w = w; /* establish link to w, as long as w does not change pointer */ /* some training information */ printf("C: %.8g\n", C); printf("epsilon: %.8g\n", epsilon); printf("sample.n: %ld\n", sample.n); printf("sm.sizePsi: %ld\n", sm.sizePsi); fflush(stdout); /* impute latent variable for first iteration */ init_latent_variables(&sample,&learn_parm,&sm,&sparm); /* prepare feature vector cache for correct labels with imputed latent variables */ fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*)); for (i=0; i<m; i++) { fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm); /* DEBUG */ printf("true_psi[%d]=", i); for (int j = 0; j < sm.sizePsi; ++j) printf("%.4lf ", fy->words[j].weight); printf("\n"); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } /* outer loop: latent variable imputation */ outer_iter = 1; //last_primal_obj = 0; //decrement = 0; //cooling_eps = 0.5*C*epsilon; //while ((outer_iter<=MIN_OUTER_ITER)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) { while (outer_iter<MAX_OUTER_ITER) { LearningTracker::NextOuterIteration(); printf("OUTER ITER %d\n", outer_iter); /* cutting plane algorithm */ primal_obj = cutting_plane_algorithm(w, m, MAX_ITER, C, /*cooling_eps, */fycache, ex, &sm, &sparm); /* compute decrement in objective in this outer iteration */ /* decrement = last_primal_obj - primal_obj; last_primal_obj = primal_obj; printf("primal objective: %.4f\n", primal_obj); printf("decrement: %.4f\n", decrement); fflush(stdout); stop_crit = (decrement<C*epsilon)&&(cooling_eps<0.5*C*epsilon+1E-8); cooling_eps = -decrement*0.01; cooling_eps = MAX(cooling_eps, 0.5*C*epsilon); printf("cooling_eps: %.8g\n", cooling_eps); */ /* print new weights */ printf("W="); for (i = 1; i <= sm.sizePsi; ++i) printf("%.3f ", sm.w[i]); printf("\n"); /* Save model */ char modelfile_tmp[1024]; sprintf(modelfile_tmp, "%s.%d", modelfile, outer_iter); write_struct_model(modelfile_tmp, &sm, &sparm); /* impute latent variable using updated weight vector */ for (i=0; i<m; i++) { free_latent_var(ex[i].h); ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm); } /* re-compute feature vector cache */ for (i=0; i<m; i++) { free_svector(fycache[i]); fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm); /* DEBUG */ printf("true_psi[%d]=", i); for (int j = 0; j < sm.sizePsi; ++j) printf("%.4lf ", fy->words[j].weight); printf("\n"); diff = add_list_ss(fy); free_svector(fy); fy = diff; fycache[i] = fy; } outer_iter++; } // end outer loop /* write structural model */ write_struct_model(modelfile, &sm, &sparm); // skip testing for the moment /* free memory */ free_struct_sample(sample); free_struct_model(sm, &sparm); for(i=0; i<m; i++) { free_svector(fycache[i]); } free(fycache); return(0); }