コード例 #1
0
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j;
  double *alpha;
  DOC **dXc; // constraint matrix 
  double *delta; // rhs of constraints 
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **G = NULL;
	double **G2 = NULL;
	double **qmatrix = NULL;
	SVECTOR *f;
	int r;

  // set parameters for hideo solver 
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  // changed from 1e-15 
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  //qmatrix = (double **) malloc(sizeof(double *)*10);
  //assert(qmatrix!=NULL);

  printf("Running structural SVM solver: "); fflush(stdout); 
	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    // add  constraint 
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;


	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	//alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	//assert(alpha!=NULL);
   		//for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){
   		//	alpha[j] = 0.0;
   		//}
   		alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;

		idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;

		
		qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active);
  		assert(qmatrix!=NULL);

		qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size));
		for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){
			qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1));
		}

		// update Gram matrix 
		G = (double **) realloc(G, sizeof(double *)*size_active);
		assert(G!=NULL);
		G[size_active-1] = NULL;
		for(j = 0; j < size_active; j++) {
			G[j] = (double *) realloc(G[j], sizeof(double)*size_active);
			assert(G[j]!=NULL);
		}

		for(j = 0; j < size_active-1; j++) {
			G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
			G[size_active-1][j] = G[size_active-1][j]/2;
			G[j][size_active-1]  = G[size_active-1][j];
		}
		G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

		// hack: add a constant to the diagonal to make sure G is PSD 
		G[size_active-1][size_active-1] += 1e-6;

	   	// solve QP to update alpha 
		//r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size));
		r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0);
	    
		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			fflush(stdout);
			//exit(1);
			while(r==1295) {
				printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization);
				fflush(stdout);
				for(i=0;i<size_active;i++) {
					G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization;
				}
				sparm->gram_regularization *= 10;
				r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1);
			}
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

	   	clear_nvector(w,sm->sizePsi);
	   	for (j=0;j<size_active;j++) {
	     	if (alpha[j]>C*ALPHA_THRESHOLD) {
					add_vector_ns(w,dXc[j]->fvec,alpha[j]);
					idle[j] = 0;
	     	}
			else
				idle[j]++;
	   	}
	   	for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){
	   		if (alpha[size_active+j] > EQUALITY_EPSILON){
	   			w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j];
	   		}	   		
	   	}

	   	for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
	   		}
	   	}	   

	   	for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		//assert(w[j] <= 0);
	   		if(w[j]>0){
	   			printf("j = %ld, w[j] = %0.6f\n", j, w[j]);
	   			fflush(stdout);
	   		}
	   		
	   	}	

		cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);

		for(i = 0; i < size_active; i++) {
			cur_slack[i] = 0.0;
			for(f = dXc[i]->fvec; f; f = f->next) {
				j = 0;
				while(f->words[j].wnum) {
					cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight;
					j++;
				}
			}
			if(cur_slack[i] >= delta[i])
				cur_slack[i] = 0.0;
			else
				cur_slack[i] = delta[i]-cur_slack[i];
		}

		mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}

		if(size_active > 1)
			threshold = cur_slack[mv_iter];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter);
		}

		free(alpha);
		alpha=NULL;

 	} // end cutting plane while loop 

	//primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  // free memory
  for (j=0;j<size_active;j++) {
		free(G[j]);
    free_example(dXc[j],1);	
  }
	free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  //return(primal_obj);
  return;
}
コード例 #2
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
コード例 #3
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache,
                               EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i, j;
    double xi;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj, alphasum;
    int iter, size_active;
    double value;
    int r;
    int *idle; /* for cleaning up */
    double margin;
    double primal_obj;
    double *proximal_rhs;
    double *gammaG0 = NULL;
    double min_rho = 0.001;
    double max_rho;
    double serious_counter = 0;
    double rho = 1.0; /* temporarily set it to 1 first */

    double expected_descent, primal_obj_b = -1, reg_master_obj;
    int null_step = 1;
    double *w_b;
    double kappa = 0.1;
    double temp_var;
    double proximal_term, primal_lower_bound;

    double v_k;
    double obj_difference;
    double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    double sigma_k;
    double m2 = 0.2;
    double m3 = 0.9;
    double gTd;
    double last_sigma_k = 0;

    double initial_primal_obj;
    int suff_decrease_cond = 0;
    double decrease_proportion = 0.2; // start from 0.2 first

    double z_k_norm;
    double last_z_k_norm = 0;


    /* set parameters for hideo solver */
    LEARN_PARM lparm;
    KERNEL_PARM kparm;
    MODEL *svmModel = NULL;
    lparm.biased_hyperplane = 0;
    lparm.epsilon_crit = MIN(epsilon, 0.001);
    lparm.svm_c = C;
    lparm.sharedslack = 1;
    kparm.kernel_type = LINEAR;

    lparm.remove_inconsistent = 0;
    lparm.skip_final_opt_check = 0;
    lparm.svm_maxqpsize = 10;
    lparm.svm_newvarsinqp = 0;
    lparm.svm_iter_to_shrink = -9999;
    lparm.maxiter = 100000;
    lparm.kernel_cache_size = 40;
    lparm.eps = epsilon;
    lparm.transduction_posratio = -1.0;
    lparm.svm_costratio = 1.0;
    lparm.svm_costratio_unlab = 1.0;
    lparm.svm_unlabbound = 1E-5;
    lparm.epsilon_a = 1E-10;  /* changed from 1e-15 */
    lparm.compute_loo = 0;
    lparm.rho = 1.0;
    lparm.xa_depth = 0;
    strcpy(lparm.alphafile, "");
    kparm.poly_degree = 3;
    kparm.rbf_gamma = 1.0;
    kparm.coef_lin = 1;
    kparm.coef_const = 1;
    strcpy(kparm.custom, "empty");


    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b, sm->sizePsi);
    /* warm start */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w_b[i] = w[i];
    }

    iter = 0;
    size_active = 0;
    xi = 0.0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    idle = NULL;

    proximal_rhs = NULL;
    cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value;
    primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;
    primal_lower_bound = 0;
    expected_descent = -primal_obj_b;
    initial_primal_obj = primal_obj_b;

    //max_rho = C;
    max_rho = 100 * C; // tree-edge loss not within 0-1

    printf("Running CCCP inner loop solver: ");
    fflush(stdout);

    while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) {
        iter += 1;
        size_active += 1;

#if (DEBUG_LEVEL > 0)
        printf("ITER %d\n", iter);
#endif
        printf(".");
        fflush(stdout);


        /* add  constraint */
        dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active);
        assert(dXc != NULL);
        dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC));
        dXc[size_active - 1]->fvec = new_constraint;
        dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active - 1]->costfactor = 1.0;

        delta = (double *) realloc(delta, sizeof(double) * size_active);
        assert(delta != NULL);
        delta[size_active - 1] = margin;
        alpha = (double *) realloc(alpha, sizeof(double) * size_active);
        assert(alpha != NULL);
        alpha[size_active - 1] = 0.0;
        idle = (int *) realloc(idle, sizeof(int) * size_active);
        assert(idle != NULL);
        idle[size_active - 1] = 0;
        /* proximal point */
        proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active);
        assert(proximal_rhs != NULL);
        cut_error = (double *) realloc(cut_error, sizeof(double) * size_active);
        assert(cut_error != NULL);
        // note g_i = - new_constraint
        cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
        cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));

        gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active);
        assert(gammaG0 != NULL);

        /* update Gram matrix */
        G = (double **) realloc(G, sizeof(double *) * size_active);
        assert(G != NULL);
        G[size_active - 1] = NULL;
        for (j = 0; j < size_active; j++) {
            G[j] = (double *) realloc(G[j], sizeof(double) * size_active);
            assert(G[j] != NULL);
        }
        for (j = 0; j < size_active - 1; j++) {
            G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec);
            G[j][size_active - 1] = G[size_active - 1][j];
        }
        G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec);


        /* update gammaG0 */
        if (null_step == 1) {
            gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec);
        } else {
            for (i = 0; i < size_active; i++) {
                gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
            }
        }

        /* update proximal_rhs */
        for (i = 0; i < size_active; i++) {
            proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i];
        }


        /* solve QP to update alpha */
        //dual_obj = 0;
        //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
        if (size_active > 1) {
            if (svmModel != NULL) free_model(svmModel, 0);
            svmModel = (MODEL *) my_malloc(sizeof(MODEL));
            svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha);
        } else {
            assert(size_active == 1);
            alpha[0] = C;
        }
        /* DEBUG */
        //printf("r: %d\n", r); fflush(stdout);
        /* END DEBUG */

        clear_nvector(w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            if (alpha[j] > C * ALPHA_THRESHOLD) {
                add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho));
            }
        }

        /* compute dual obj */
        dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j];
        }

        z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi));

        add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho));


        /* detect if step size too small */
        sigma_k = 0;
        alphasum = 0;
        for (j = 0; j < size_active; j++) {
            sigma_k += alpha[j] * cut_error[j];
            alphasum += alpha[j];
        }
        sigma_k /= C;
        gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint));

#if (DEBUG_LEVEL > 0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
#endif


        /* update cleanup information */
        for (j = 0; j < size_active; j++) {
            if (alpha[j] < ALPHA_THRESHOLD * C) {
                idle[j]++;
            } else {
                idle[j] = 0;
            }
        }

        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);

        /* print primal objective */
        primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;

#if (DEBUG_LEVEL > 0)
        printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif


        temp_var = sprod_nn(w_b, w_b, sm->sizePsi);
        proximal_term = 0.0;
        for (i = 1; i < sm->sizePsi + 1; i++) {
            proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]);
        }

        reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term);

#if (DEBUG_LEVEL > 0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
#endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj < primal_obj_b + kappa * expected_descent) {
            /* extra condition to be met */
            if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) {
#if (DEBUG_LEVEL > 0)
                printf("SERIOUS STEP\n");
#endif
                /* update cut_error */
                for (i = 0; i < size_active; i++) {
                    cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
                    cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec);
                    cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));
                    cut_error[i] += C * sprod_ns(w, dXc[i]->fvec);
                }
                primal_obj_b = primal_obj;
                for (i = 1; i < sm->sizePsi + 1; i++) {
                    w_b[i] = w[i];
                }
                null_step = 0;
                serious_counter++;
            } else {
                /* increase step size */
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: SS(ii) FAILS.\n");
#endif
                serious_counter--;
                rho = MAX(rho / 10, min_rho);
            }
        } else { /* no sufficient decrease */
            serious_counter--;

            if ((cut_error[size_active - 1] > m3 * last_sigma_k) &&
                (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) {
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: NS(ii) FAILS.\n");
#endif
                rho = MIN(10 * rho, max_rho);
            }
#if (DEBUG_LEVEL > 0)
            else printf("NULL STEP\n");
#endif
        }
        /* update last_sigma_k */
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        /* break away from while loop if more than certain proportioal decrease in primal objective */
        if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) {
            suff_decrease_cond = 1;
        }

        /* clean up */
        if (iter % CLEANUP_CHECK == 0) {
            //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
            size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc,
                                         &cut_error);
        }


    } // end cutting plane while loop

    printf(" Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j = 0; j < size_active; j++) {
        free(G[j]);
        free_example(dXc[j], 0);
    }
    free(G);
    free(dXc);
    free(alpha);
    free(delta);
    free_svector(new_constraint);
    free(idle);
    free(gammaG0);
    free(proximal_rhs);
    free(cut_error);

    /* copy and free */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w[i] = w_b[i];
    }
    free(w_b);

    return (primal_obj_b);

}