Ejemplo n.º 1
0
SVECTOR* add_list_ns(SVECTOR *a) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR. assumes that the number of
	features is small compared to the number of elements in the
	list */
{
    SVECTOR *vec,*f;
    register WORD *ai;
    long totwords;
    double *sum;

    /* find max feature number */
    totwords=0;
    for(f=a;f;f=f->next) {
      ai=f->words;
      while (ai->wnum) {
	if(totwords<ai->wnum) 
	  totwords=ai->wnum;
	ai++;
      }
    }
    sum=create_nvector(totwords);
    /* printf("totwords=%ld, %p\n",totwords, (void *)sum); */

    clear_nvector(sum,totwords);
    for(f=a;f;f=f->next)  
      add_vector_ns(sum,f,f->factor);

    vec=create_svector_n(sum,totwords,"",1.0);
    free(sum);

    return(vec);
}
Ejemplo n.º 2
0
void add_weight_vector_to_linear_model(MODEL *model)
     /* compute weight vector in linear case and add to model */
{
  long i;

  model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
  clear_vector_n(model->lin_weights,model->totwords);
  for(i=1;i<model->sv_num;i++) {
    add_vector_ns(model->lin_weights,(model->supvec[i]->vectors[0])->words,
		  model->alpha[i]);
  }
}
Ejemplo n.º 3
0
void add_weight_vector_to_linear_model(MODEL *model)
     /* compute weight vector in linear case and add to model */
{
  long i;
  SVECTOR *f;

  model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
  clear_vector_n(model->lin_weights,model->totwords);
  for(i=1;i<model->sv_num;i++) {
    for(f=(model->supvec[i])->fvec;f;f=f->next)  
      add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]);
  }
}
Ejemplo n.º 4
0
void add_list_n_ns(double *vec_n, SVECTOR *vec_s, double faktor)
{
  SVECTOR *f;
  //register WORD *ai;
  //int i;

  //i = 0;
  for(f=vec_s;f;f=f->next)  
  {
    //i++;
    //ai = f->words; 
    //while(ai->wnum)
    //{
    //    printf("%d:  %d:  %f\n", i, ai->wnum, ai->weight);
    //    ai++;
    //}
    add_vector_ns(vec_n,f,f->factor*faktor);
  }
}
Ejemplo n.º 5
0
double* add_list_nn(SVECTOR *a, long totwords) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR. assumes that the number of
	features is small compared to the number of elements in the
	list */
{
    SVECTOR *f;
    long i;
    double *sum;

    sum=create_nvector(totwords);

    for(i=0;i<=totwords;i++) 
      sum[i]=0;

    for(f=a;f;f=f->next)  
      add_vector_ns(sum,f,f->factor);

    return(sum);
}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
Ejemplo n.º 7
0
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j;
  double *alpha;
  DOC **dXc; // constraint matrix 
  double *delta; // rhs of constraints 
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **G = NULL;
	double **G2 = NULL;
	double **qmatrix = NULL;
	SVECTOR *f;
	int r;

  // set parameters for hideo solver 
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  // changed from 1e-15 
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  //qmatrix = (double **) malloc(sizeof(double *)*10);
  //assert(qmatrix!=NULL);

  printf("Running structural SVM solver: "); fflush(stdout); 
	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    // add  constraint 
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;


	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	//alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	//assert(alpha!=NULL);
   		//for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){
   		//	alpha[j] = 0.0;
   		//}
   		alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;

		idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;

		
		qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active);
  		assert(qmatrix!=NULL);

		qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size));
		for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){
			qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1));
		}

		// update Gram matrix 
		G = (double **) realloc(G, sizeof(double *)*size_active);
		assert(G!=NULL);
		G[size_active-1] = NULL;
		for(j = 0; j < size_active; j++) {
			G[j] = (double *) realloc(G[j], sizeof(double)*size_active);
			assert(G[j]!=NULL);
		}

		for(j = 0; j < size_active-1; j++) {
			G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
			G[size_active-1][j] = G[size_active-1][j]/2;
			G[j][size_active-1]  = G[size_active-1][j];
		}
		G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

		// hack: add a constant to the diagonal to make sure G is PSD 
		G[size_active-1][size_active-1] += 1e-6;

	   	// solve QP to update alpha 
		//r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size));
		r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0);
	    
		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			fflush(stdout);
			//exit(1);
			while(r==1295) {
				printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization);
				fflush(stdout);
				for(i=0;i<size_active;i++) {
					G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization;
				}
				sparm->gram_regularization *= 10;
				r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1);
			}
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

	   	clear_nvector(w,sm->sizePsi);
	   	for (j=0;j<size_active;j++) {
	     	if (alpha[j]>C*ALPHA_THRESHOLD) {
					add_vector_ns(w,dXc[j]->fvec,alpha[j]);
					idle[j] = 0;
	     	}
			else
				idle[j]++;
	   	}
	   	for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){
	   		if (alpha[size_active+j] > EQUALITY_EPSILON){
	   			w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j];
	   		}	   		
	   	}

	   	for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
	   		}
	   	}	   

	   	for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		//assert(w[j] <= 0);
	   		if(w[j]>0){
	   			printf("j = %ld, w[j] = %0.6f\n", j, w[j]);
	   			fflush(stdout);
	   		}
	   		
	   	}	

		cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);

		for(i = 0; i < size_active; i++) {
			cur_slack[i] = 0.0;
			for(f = dXc[i]->fvec; f; f = f->next) {
				j = 0;
				while(f->words[j].wnum) {
					cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight;
					j++;
				}
			}
			if(cur_slack[i] >= delta[i])
				cur_slack[i] = 0.0;
			else
				cur_slack[i] = delta[i]-cur_slack[i];
		}

		mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}

		if(size_active > 1)
			threshold = cur_slack[mv_iter];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter);
		}

		free(alpha);
		alpha=NULL;

 	} // end cutting plane while loop 

	//primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  // free memory
  for (j=0;j<size_active;j++) {
		free(G[j]);
    free_example(dXc[j],1);	
  }
	free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  //return(primal_obj);
  return;
}
Ejemplo n.º 8
0
void add_list_n_ns(double *vec_n, SVECTOR *vec_s, double faktor)
{
  SVECTOR *f;
  for(f=vec_s;f;f=f->next)  
    add_vector_ns(vec_n,f,f->factor*faktor);
}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, /*double epsilon,*/ SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i,j;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj/*, alphasum*/;
    int iter, size_active, no_violation_iter;
    double value;
    //int r;
    //int *idle; /* for cleaning up */
    double margin;
    //double primal_obj;
    double lower_bound, approx_upper_bound;
    double *proximal_rhs;
    //double *gammaG0=NULL;
    //double min_rho = 0.001;
    //double max_rho;
    //double serious_counter=0;
    //double rho = 1.0;

    //double expected_descent, primal_obj_b=-1, reg_master_obj;
    //int null_step=1;
    //double *w_b;
    //double kappa=0.01;
    //double temp_var;
    //double proximal_term, primal_lower_bound;

    //double v_k;
    //double obj_difference;
    // double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    //double sigma_k;
    //double m2 = 0.2;
    //double m3 = 0.9;
    //double gTd;
    //double last_sigma_k=0;

    //double initial_primal_obj;
    //int suff_decrease_cond=0;
    //double decrease_proportion = 0.2; // start from 0.2 first

    //double z_k_norm;
    //double last_z_k_norm=0;


    /*
    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b,sm->sizePsi);
    // warm start
    for (i=1;i<sm->sizePsi+1;i++) {
      w_b[i] = w[i];
    }*/

    iter = 0;
    no_violation_iter = 0;
    size_active = 0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    //idle = NULL;

    proximal_rhs = NULL;
    //cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    //primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value;
    //primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
    //primal_lower_bound = 0;
    //expected_descent = -primal_obj_b;
    //initial_primal_obj = primal_obj_b;

    //max_rho = C;

    // Non negative weight constraints
    int nNonNeg = sm->sizePsi - sm->firstNonNegWeightIndex + 1;
    G = (double**)malloc(sizeof(double*)*nNonNeg);
    for (j=0; j<nNonNeg; j++) {
        G[j] = (double*)malloc(sizeof(double)*nNonNeg);
        for (int k=0; k<nNonNeg; k++) {
            G[j][k] = 0;
        }

        G[j][j] = 1.0;
    }
    double* alphabeta = NULL;

    while (/*(!suff_decrease_cond)&&(expected_descent<-epsilon)&&*/(iter<MAX_ITER)&&(no_violation_iter<MAX_INNER_ITER_NO_VIOLATION)) {
        LearningTracker::NextInnerIteration();
        iter+=1;
        size_active+=1;

#if (DEBUG_LEVEL>0)
        printf("INNER ITER %d\n", iter);
#endif

        /* add  constraint */
        dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
        assert(dXc!=NULL);
        dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
        dXc[size_active-1]->fvec = new_constraint;
        dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active-1]->costfactor = 1.0;

        delta = (double*)realloc(delta, sizeof(double)*size_active);
        assert(delta!=NULL);
        delta[size_active-1] = margin;

        alphabeta = (double*)realloc(alphabeta, sizeof(double)*(size_active+nNonNeg));
        assert(alphabeta!=NULL);
        alphabeta[size_active+nNonNeg-1] = 0.0;

        /*idle = (int*)realloc(idle, sizeof(int)*size_active);
        assert(idle!=NULL);
        idle[size_active-1] = 0;*/

        /* proximal point */
        proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*(size_active+nNonNeg));
        assert(proximal_rhs!=NULL);

        /*cut_error = (double*)realloc(cut_error, sizeof(double)*size_active);
        assert(cut_error!=NULL);
        // note g_i = - new_constraint
        cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
        cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); */

        /*gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
        assert(gammaG0!=NULL);*/

        /* update Gram matrix */
        G = (double**)realloc(G, sizeof(double*)*(size_active+nNonNeg));
        assert(G!=NULL);
        G[size_active+nNonNeg-1] = NULL;
        for (j=0; j<size_active+nNonNeg; j++) {
            G[j] = (double*)realloc(G[j], sizeof(double)*(size_active+nNonNeg));
            assert(G[j]!=NULL);
        }
        for (j=0; j<size_active-1; j++) {
            G[size_active+nNonNeg-1][j+nNonNeg] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
            G[j+nNonNeg][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j+nNonNeg];
        }
        G[size_active+nNonNeg-1][size_active+nNonNeg-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

        for (j=0; j<nNonNeg; j++) {
            WORD indicator[2];
            indicator[0].wnum = j + sm->firstNonNegWeightIndex;
            indicator[0].weight = 1.0;
            indicator[1].wnum = 0;
            indicator[1].weight = 0.0;
            SVECTOR* indicator_vec = create_svector(indicator, NULL, 1.0);
            G[size_active+nNonNeg-1][j] = sprod_ss(dXc[size_active-1]->fvec, indicator_vec);
            G[j][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j];
            free_svector(indicator_vec);
        }

        /* update gammaG0 */
        /*if (null_step==1) {
          gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
        } else {
          for (i=0;i<size_active;i++) {
            gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
          }
        }*/

        /* update proximal_rhs */
        for (i=0; i<size_active; i++) {
            proximal_rhs[i+nNonNeg] = -delta[i]; //(1+rho) * (rho * gammaG0[i] - (1 + rho) * delta[i]);
        }
        for (i=0; i<nNonNeg; i++) {
            proximal_rhs[i] = 0; //w_b[i + 1]*rho * (1+rho);
        }

        /* DEBUG */
        /*
        for (i = 0; i < size_active + nNonNeg; ++i) {
        	printf("G[%d]=", i);
        	for (j = 0; j < size_active + nNonNeg; ++j) {
        		printf("%.4f ", G[i][j]);
        	}
        	printf("\n");
        }
        printf("\n");
        for (i = 0; i < size_active + nNonNeg; ++i)
        	printf("proximal_rhs[%d]=%.4f\n", i, proximal_rhs[i]);
        */

        /* solve QP to update alpha */
        dual_obj = 0;
        mosek_qp_optimize(G, proximal_rhs, alphabeta, (long) size_active+nNonNeg, C, &dual_obj, nNonNeg);
        printf("dual_obj=%.4lf\n", dual_obj);

        alpha = alphabeta + nNonNeg;

        clear_nvector(w,sm->sizePsi);
        for (i = 0; i < nNonNeg; i++) {
            w[sm->firstNonNegWeightIndex + i] = alphabeta[i];//alphabeta[i]/(1+rho);  // add betas
        }
        for (j=0; j<size_active; j++) {
            if (alpha[j]>C*ALPHA_THRESHOLD) {
                //add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
                add_vector_ns(w,dXc[j]->fvec,alpha[j]);
            }
        }

        //z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi));

        //add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

        LearningTracker::ReportWeights(w, sm->sizePsi);

        /* detect if step size too small */
        /*
        sigma_k = 0;
        alphasum = 0;
        for (j=0;j<size_active;j++) {
          sigma_k += alpha[j]*cut_error[j];
          alphasum+=alpha[j];
        }
        sigma_k/=C;
        gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

        #if (DEBUG_LEVEL>0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
        #endif
        */

        /* update cleanup information */
        /*
        for (j=0;j<size_active;j++) {
          if (alpha[j]<ALPHA_THRESHOLD*C) {
        idle[j]++;
          } else {
            idle[j]=0;
          }
        }
        */

        // update lower bound
        double xi = -1e+20;
        for (i = 0; i < size_active; ++i) {
            xi = MAX(xi, delta[i] - sprod_ns(w, dXc[i]->fvec));
        }
        lower_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*xi;
        printf("lower_bound=%.4lf\n", lower_bound);
        assert(fabs(lower_bound + dual_obj) < 1e-6);
        LearningTracker::ReportLowerBound(lower_bound);

        // find new constraint
        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);
        double violation = value - xi;
        if (violation > CUTTING_PLANE_EPS) {
            printf("New constraint is violated by %.4lf\n", violation);
            no_violation_iter = 0;
        } else {
            ++no_violation_iter;
            printf("New constraint is underviolated by %.4lf\n", violation);
            printf("%d more such constraints to stop\n", MAX_INNER_ITER_NO_VIOLATION - no_violation_iter);
        }

        // update upper bound
        approx_upper_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
        printf("approx_upper_bound=%.4lf\n", approx_upper_bound);
        LearningTracker::ReportUpperBound(approx_upper_bound);

        /*
        temp_var = sprod_nn(w_b,w_b,sm->sizePsi);
        proximal_term = 0.0;
        for (i=1;i<sm->sizePsi+1;i++) {
          proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
        }

        reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);
        LearningTracker::ReportLowerBoundValue(reg_master_obj);

        #if (DEBUG_LEVEL>0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMAL_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
        #endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj<primal_obj_b+kappa*expected_descent) {
          // extra condition to be met
          if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
        #if (DEBUG_LEVEL>0)
        printf("SERIOUS STEP\n");
        #endif
        // update cut_error
        for (i=0;i<size_active;i++) {
          cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
          cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec);
          cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
          cut_error[i] += C*sprod_ns(w, dXc[i]->fvec);
        }
        primal_obj_b = primal_obj;
        for (i=1;i<sm->sizePsi+1;i++) {
          w_b[i] = w[i];
        }
        null_step = 0;
        serious_counter++;
          } else {
        // increase step size
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: SS(ii) FAILS.\n");
        #endif
        serious_counter--;
        rho = MAX(rho/10,min_rho);
          }
        } else { // no sufficient decrease
          serious_counter--;
          if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: NS(ii) FAILS.\n");
        #endif
        rho = MIN(10*rho,max_rho);
          }
        #if (DEBUG_LEVEL>0)
          else printf("NULL STEP\n");
        #endif
        }
        // update last_sigma_k
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        // break away from while loop if more than certain proportioal decrease in primal objective
        if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
          suff_decrease_cond = 1;
        }

        // clean up
        if (iter % CLEANUP_CHECK == 0) {
          size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
        }
        */

    } // end cutting plane while loop

    printf("Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j=0; j<size_active; j++) {
        free(G[j]);
        free_example(dXc[j],0);
    }
    free(G);
    free(dXc);
    free(alphabeta);
    free(delta);
    free_svector(new_constraint);
    //free(idle);
    //free(gammaG0);
    free(proximal_rhs);
    //free(cut_error);

    /* copy and free */
    /*for (i=1;i<sm->sizePsi+1;i++) {
      w[i] = w_b[i];
    }
    free(w_b);*/

    //return(primal_obj_b);
    return lower_bound;
}
Ejemplo n.º 10
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache,
                               EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i, j;
    double xi;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj, alphasum;
    int iter, size_active;
    double value;
    int r;
    int *idle; /* for cleaning up */
    double margin;
    double primal_obj;
    double *proximal_rhs;
    double *gammaG0 = NULL;
    double min_rho = 0.001;
    double max_rho;
    double serious_counter = 0;
    double rho = 1.0; /* temporarily set it to 1 first */

    double expected_descent, primal_obj_b = -1, reg_master_obj;
    int null_step = 1;
    double *w_b;
    double kappa = 0.1;
    double temp_var;
    double proximal_term, primal_lower_bound;

    double v_k;
    double obj_difference;
    double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    double sigma_k;
    double m2 = 0.2;
    double m3 = 0.9;
    double gTd;
    double last_sigma_k = 0;

    double initial_primal_obj;
    int suff_decrease_cond = 0;
    double decrease_proportion = 0.2; // start from 0.2 first

    double z_k_norm;
    double last_z_k_norm = 0;


    /* set parameters for hideo solver */
    LEARN_PARM lparm;
    KERNEL_PARM kparm;
    MODEL *svmModel = NULL;
    lparm.biased_hyperplane = 0;
    lparm.epsilon_crit = MIN(epsilon, 0.001);
    lparm.svm_c = C;
    lparm.sharedslack = 1;
    kparm.kernel_type = LINEAR;

    lparm.remove_inconsistent = 0;
    lparm.skip_final_opt_check = 0;
    lparm.svm_maxqpsize = 10;
    lparm.svm_newvarsinqp = 0;
    lparm.svm_iter_to_shrink = -9999;
    lparm.maxiter = 100000;
    lparm.kernel_cache_size = 40;
    lparm.eps = epsilon;
    lparm.transduction_posratio = -1.0;
    lparm.svm_costratio = 1.0;
    lparm.svm_costratio_unlab = 1.0;
    lparm.svm_unlabbound = 1E-5;
    lparm.epsilon_a = 1E-10;  /* changed from 1e-15 */
    lparm.compute_loo = 0;
    lparm.rho = 1.0;
    lparm.xa_depth = 0;
    strcpy(lparm.alphafile, "");
    kparm.poly_degree = 3;
    kparm.rbf_gamma = 1.0;
    kparm.coef_lin = 1;
    kparm.coef_const = 1;
    strcpy(kparm.custom, "empty");


    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b, sm->sizePsi);
    /* warm start */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w_b[i] = w[i];
    }

    iter = 0;
    size_active = 0;
    xi = 0.0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    idle = NULL;

    proximal_rhs = NULL;
    cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value;
    primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;
    primal_lower_bound = 0;
    expected_descent = -primal_obj_b;
    initial_primal_obj = primal_obj_b;

    //max_rho = C;
    max_rho = 100 * C; // tree-edge loss not within 0-1

    printf("Running CCCP inner loop solver: ");
    fflush(stdout);

    while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) {
        iter += 1;
        size_active += 1;

#if (DEBUG_LEVEL > 0)
        printf("ITER %d\n", iter);
#endif
        printf(".");
        fflush(stdout);


        /* add  constraint */
        dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active);
        assert(dXc != NULL);
        dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC));
        dXc[size_active - 1]->fvec = new_constraint;
        dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active - 1]->costfactor = 1.0;

        delta = (double *) realloc(delta, sizeof(double) * size_active);
        assert(delta != NULL);
        delta[size_active - 1] = margin;
        alpha = (double *) realloc(alpha, sizeof(double) * size_active);
        assert(alpha != NULL);
        alpha[size_active - 1] = 0.0;
        idle = (int *) realloc(idle, sizeof(int) * size_active);
        assert(idle != NULL);
        idle[size_active - 1] = 0;
        /* proximal point */
        proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active);
        assert(proximal_rhs != NULL);
        cut_error = (double *) realloc(cut_error, sizeof(double) * size_active);
        assert(cut_error != NULL);
        // note g_i = - new_constraint
        cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
        cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));

        gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active);
        assert(gammaG0 != NULL);

        /* update Gram matrix */
        G = (double **) realloc(G, sizeof(double *) * size_active);
        assert(G != NULL);
        G[size_active - 1] = NULL;
        for (j = 0; j < size_active; j++) {
            G[j] = (double *) realloc(G[j], sizeof(double) * size_active);
            assert(G[j] != NULL);
        }
        for (j = 0; j < size_active - 1; j++) {
            G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec);
            G[j][size_active - 1] = G[size_active - 1][j];
        }
        G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec);


        /* update gammaG0 */
        if (null_step == 1) {
            gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec);
        } else {
            for (i = 0; i < size_active; i++) {
                gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
            }
        }

        /* update proximal_rhs */
        for (i = 0; i < size_active; i++) {
            proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i];
        }


        /* solve QP to update alpha */
        //dual_obj = 0;
        //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
        if (size_active > 1) {
            if (svmModel != NULL) free_model(svmModel, 0);
            svmModel = (MODEL *) my_malloc(sizeof(MODEL));
            svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha);
        } else {
            assert(size_active == 1);
            alpha[0] = C;
        }
        /* DEBUG */
        //printf("r: %d\n", r); fflush(stdout);
        /* END DEBUG */

        clear_nvector(w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            if (alpha[j] > C * ALPHA_THRESHOLD) {
                add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho));
            }
        }

        /* compute dual obj */
        dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j];
        }

        z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi));

        add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho));


        /* detect if step size too small */
        sigma_k = 0;
        alphasum = 0;
        for (j = 0; j < size_active; j++) {
            sigma_k += alpha[j] * cut_error[j];
            alphasum += alpha[j];
        }
        sigma_k /= C;
        gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint));

#if (DEBUG_LEVEL > 0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
#endif


        /* update cleanup information */
        for (j = 0; j < size_active; j++) {
            if (alpha[j] < ALPHA_THRESHOLD * C) {
                idle[j]++;
            } else {
                idle[j] = 0;
            }
        }

        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);

        /* print primal objective */
        primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;

#if (DEBUG_LEVEL > 0)
        printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif


        temp_var = sprod_nn(w_b, w_b, sm->sizePsi);
        proximal_term = 0.0;
        for (i = 1; i < sm->sizePsi + 1; i++) {
            proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]);
        }

        reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term);

#if (DEBUG_LEVEL > 0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
#endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj < primal_obj_b + kappa * expected_descent) {
            /* extra condition to be met */
            if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) {
#if (DEBUG_LEVEL > 0)
                printf("SERIOUS STEP\n");
#endif
                /* update cut_error */
                for (i = 0; i < size_active; i++) {
                    cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
                    cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec);
                    cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));
                    cut_error[i] += C * sprod_ns(w, dXc[i]->fvec);
                }
                primal_obj_b = primal_obj;
                for (i = 1; i < sm->sizePsi + 1; i++) {
                    w_b[i] = w[i];
                }
                null_step = 0;
                serious_counter++;
            } else {
                /* increase step size */
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: SS(ii) FAILS.\n");
#endif
                serious_counter--;
                rho = MAX(rho / 10, min_rho);
            }
        } else { /* no sufficient decrease */
            serious_counter--;

            if ((cut_error[size_active - 1] > m3 * last_sigma_k) &&
                (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) {
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: NS(ii) FAILS.\n");
#endif
                rho = MIN(10 * rho, max_rho);
            }
#if (DEBUG_LEVEL > 0)
            else printf("NULL STEP\n");
#endif
        }
        /* update last_sigma_k */
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        /* break away from while loop if more than certain proportioal decrease in primal objective */
        if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) {
            suff_decrease_cond = 1;
        }

        /* clean up */
        if (iter % CLEANUP_CHECK == 0) {
            //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
            size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc,
                                         &cut_error);
        }


    } // end cutting plane while loop

    printf(" Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j = 0; j < size_active; j++) {
        free(G[j]);
        free_example(dXc[j], 0);
    }
    free(G);
    free(dXc);
    free(alpha);
    free(delta);
    free_svector(new_constraint);
    free(idle);
    free(gammaG0);
    free(proximal_rhs);
    free(cut_error);

    /* copy and free */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w[i] = w_b[i];
    }
    free(w_b);

    return (primal_obj_b);

}