double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, /*double epsilon,*/ SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i,j;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj/*, alphasum*/;
    int iter, size_active, no_violation_iter;
    double value;
    //int r;
    //int *idle; /* for cleaning up */
    double margin;
    //double primal_obj;
    double lower_bound, approx_upper_bound;
    double *proximal_rhs;
    //double *gammaG0=NULL;
    //double min_rho = 0.001;
    //double max_rho;
    //double serious_counter=0;
    //double rho = 1.0;

    //double expected_descent, primal_obj_b=-1, reg_master_obj;
    //int null_step=1;
    //double *w_b;
    //double kappa=0.01;
    //double temp_var;
    //double proximal_term, primal_lower_bound;

    //double v_k;
    //double obj_difference;
    // double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    //double sigma_k;
    //double m2 = 0.2;
    //double m3 = 0.9;
    //double gTd;
    //double last_sigma_k=0;

    //double initial_primal_obj;
    //int suff_decrease_cond=0;
    //double decrease_proportion = 0.2; // start from 0.2 first

    //double z_k_norm;
    //double last_z_k_norm=0;


    /*
    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b,sm->sizePsi);
    // warm start
    for (i=1;i<sm->sizePsi+1;i++) {
      w_b[i] = w[i];
    }*/

    iter = 0;
    no_violation_iter = 0;
    size_active = 0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    //idle = NULL;

    proximal_rhs = NULL;
    //cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    //primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value;
    //primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
    //primal_lower_bound = 0;
    //expected_descent = -primal_obj_b;
    //initial_primal_obj = primal_obj_b;

    //max_rho = C;

    // Non negative weight constraints
    int nNonNeg = sm->sizePsi - sm->firstNonNegWeightIndex + 1;
    G = (double**)malloc(sizeof(double*)*nNonNeg);
    for (j=0; j<nNonNeg; j++) {
        G[j] = (double*)malloc(sizeof(double)*nNonNeg);
        for (int k=0; k<nNonNeg; k++) {
            G[j][k] = 0;
        }

        G[j][j] = 1.0;
    }
    double* alphabeta = NULL;

    while (/*(!suff_decrease_cond)&&(expected_descent<-epsilon)&&*/(iter<MAX_ITER)&&(no_violation_iter<MAX_INNER_ITER_NO_VIOLATION)) {
        LearningTracker::NextInnerIteration();
        iter+=1;
        size_active+=1;

#if (DEBUG_LEVEL>0)
        printf("INNER ITER %d\n", iter);
#endif

        /* add  constraint */
        dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
        assert(dXc!=NULL);
        dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
        dXc[size_active-1]->fvec = new_constraint;
        dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active-1]->costfactor = 1.0;

        delta = (double*)realloc(delta, sizeof(double)*size_active);
        assert(delta!=NULL);
        delta[size_active-1] = margin;

        alphabeta = (double*)realloc(alphabeta, sizeof(double)*(size_active+nNonNeg));
        assert(alphabeta!=NULL);
        alphabeta[size_active+nNonNeg-1] = 0.0;

        /*idle = (int*)realloc(idle, sizeof(int)*size_active);
        assert(idle!=NULL);
        idle[size_active-1] = 0;*/

        /* proximal point */
        proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*(size_active+nNonNeg));
        assert(proximal_rhs!=NULL);

        /*cut_error = (double*)realloc(cut_error, sizeof(double)*size_active);
        assert(cut_error!=NULL);
        // note g_i = - new_constraint
        cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
        cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); */

        /*gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
        assert(gammaG0!=NULL);*/

        /* update Gram matrix */
        G = (double**)realloc(G, sizeof(double*)*(size_active+nNonNeg));
        assert(G!=NULL);
        G[size_active+nNonNeg-1] = NULL;
        for (j=0; j<size_active+nNonNeg; j++) {
            G[j] = (double*)realloc(G[j], sizeof(double)*(size_active+nNonNeg));
            assert(G[j]!=NULL);
        }
        for (j=0; j<size_active-1; j++) {
            G[size_active+nNonNeg-1][j+nNonNeg] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
            G[j+nNonNeg][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j+nNonNeg];
        }
        G[size_active+nNonNeg-1][size_active+nNonNeg-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

        for (j=0; j<nNonNeg; j++) {
            WORD indicator[2];
            indicator[0].wnum = j + sm->firstNonNegWeightIndex;
            indicator[0].weight = 1.0;
            indicator[1].wnum = 0;
            indicator[1].weight = 0.0;
            SVECTOR* indicator_vec = create_svector(indicator, NULL, 1.0);
            G[size_active+nNonNeg-1][j] = sprod_ss(dXc[size_active-1]->fvec, indicator_vec);
            G[j][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j];
            free_svector(indicator_vec);
        }

        /* update gammaG0 */
        /*if (null_step==1) {
          gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
        } else {
          for (i=0;i<size_active;i++) {
            gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
          }
        }*/

        /* update proximal_rhs */
        for (i=0; i<size_active; i++) {
            proximal_rhs[i+nNonNeg] = -delta[i]; //(1+rho) * (rho * gammaG0[i] - (1 + rho) * delta[i]);
        }
        for (i=0; i<nNonNeg; i++) {
            proximal_rhs[i] = 0; //w_b[i + 1]*rho * (1+rho);
        }

        /* DEBUG */
        /*
        for (i = 0; i < size_active + nNonNeg; ++i) {
        	printf("G[%d]=", i);
        	for (j = 0; j < size_active + nNonNeg; ++j) {
        		printf("%.4f ", G[i][j]);
        	}
        	printf("\n");
        }
        printf("\n");
        for (i = 0; i < size_active + nNonNeg; ++i)
        	printf("proximal_rhs[%d]=%.4f\n", i, proximal_rhs[i]);
        */

        /* solve QP to update alpha */
        dual_obj = 0;
        mosek_qp_optimize(G, proximal_rhs, alphabeta, (long) size_active+nNonNeg, C, &dual_obj, nNonNeg);
        printf("dual_obj=%.4lf\n", dual_obj);

        alpha = alphabeta + nNonNeg;

        clear_nvector(w,sm->sizePsi);
        for (i = 0; i < nNonNeg; i++) {
            w[sm->firstNonNegWeightIndex + i] = alphabeta[i];//alphabeta[i]/(1+rho);  // add betas
        }
        for (j=0; j<size_active; j++) {
            if (alpha[j]>C*ALPHA_THRESHOLD) {
                //add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
                add_vector_ns(w,dXc[j]->fvec,alpha[j]);
            }
        }

        //z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi));

        //add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

        LearningTracker::ReportWeights(w, sm->sizePsi);

        /* detect if step size too small */
        /*
        sigma_k = 0;
        alphasum = 0;
        for (j=0;j<size_active;j++) {
          sigma_k += alpha[j]*cut_error[j];
          alphasum+=alpha[j];
        }
        sigma_k/=C;
        gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

        #if (DEBUG_LEVEL>0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
        #endif
        */

        /* update cleanup information */
        /*
        for (j=0;j<size_active;j++) {
          if (alpha[j]<ALPHA_THRESHOLD*C) {
        idle[j]++;
          } else {
            idle[j]=0;
          }
        }
        */

        // update lower bound
        double xi = -1e+20;
        for (i = 0; i < size_active; ++i) {
            xi = MAX(xi, delta[i] - sprod_ns(w, dXc[i]->fvec));
        }
        lower_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*xi;
        printf("lower_bound=%.4lf\n", lower_bound);
        assert(fabs(lower_bound + dual_obj) < 1e-6);
        LearningTracker::ReportLowerBound(lower_bound);

        // find new constraint
        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);
        double violation = value - xi;
        if (violation > CUTTING_PLANE_EPS) {
            printf("New constraint is violated by %.4lf\n", violation);
            no_violation_iter = 0;
        } else {
            ++no_violation_iter;
            printf("New constraint is underviolated by %.4lf\n", violation);
            printf("%d more such constraints to stop\n", MAX_INNER_ITER_NO_VIOLATION - no_violation_iter);
        }

        // update upper bound
        approx_upper_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
        printf("approx_upper_bound=%.4lf\n", approx_upper_bound);
        LearningTracker::ReportUpperBound(approx_upper_bound);

        /*
        temp_var = sprod_nn(w_b,w_b,sm->sizePsi);
        proximal_term = 0.0;
        for (i=1;i<sm->sizePsi+1;i++) {
          proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
        }

        reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);
        LearningTracker::ReportLowerBoundValue(reg_master_obj);

        #if (DEBUG_LEVEL>0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMAL_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
        #endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj<primal_obj_b+kappa*expected_descent) {
          // extra condition to be met
          if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
        #if (DEBUG_LEVEL>0)
        printf("SERIOUS STEP\n");
        #endif
        // update cut_error
        for (i=0;i<size_active;i++) {
          cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
          cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec);
          cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
          cut_error[i] += C*sprod_ns(w, dXc[i]->fvec);
        }
        primal_obj_b = primal_obj;
        for (i=1;i<sm->sizePsi+1;i++) {
          w_b[i] = w[i];
        }
        null_step = 0;
        serious_counter++;
          } else {
        // increase step size
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: SS(ii) FAILS.\n");
        #endif
        serious_counter--;
        rho = MAX(rho/10,min_rho);
          }
        } else { // no sufficient decrease
          serious_counter--;
          if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: NS(ii) FAILS.\n");
        #endif
        rho = MIN(10*rho,max_rho);
          }
        #if (DEBUG_LEVEL>0)
          else printf("NULL STEP\n");
        #endif
        }
        // update last_sigma_k
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        // break away from while loop if more than certain proportioal decrease in primal objective
        if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
          suff_decrease_cond = 1;
        }

        // clean up
        if (iter % CLEANUP_CHECK == 0) {
          size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
        }
        */

    } // end cutting plane while loop

    printf("Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j=0; j<size_active; j++) {
        free(G[j]);
        free_example(dXc[j],0);
    }
    free(G);
    free(dXc);
    free(alphabeta);
    free(delta);
    free_svector(new_constraint);
    //free(idle);
    //free(gammaG0);
    free(proximal_rhs);
    //free(cut_error);

    /* copy and free */
    /*for (i=1;i<sm->sizePsi+1;i++) {
      w[i] = w_b[i];
    }
    free(w_b);*/

    //return(primal_obj_b);
    return lower_bound;
}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache,
                               EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i, j;
    double xi;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj, alphasum;
    int iter, size_active;
    double value;
    int r;
    int *idle; /* for cleaning up */
    double margin;
    double primal_obj;
    double *proximal_rhs;
    double *gammaG0 = NULL;
    double min_rho = 0.001;
    double max_rho;
    double serious_counter = 0;
    double rho = 1.0; /* temporarily set it to 1 first */

    double expected_descent, primal_obj_b = -1, reg_master_obj;
    int null_step = 1;
    double *w_b;
    double kappa = 0.1;
    double temp_var;
    double proximal_term, primal_lower_bound;

    double v_k;
    double obj_difference;
    double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    double sigma_k;
    double m2 = 0.2;
    double m3 = 0.9;
    double gTd;
    double last_sigma_k = 0;

    double initial_primal_obj;
    int suff_decrease_cond = 0;
    double decrease_proportion = 0.2; // start from 0.2 first

    double z_k_norm;
    double last_z_k_norm = 0;


    /* set parameters for hideo solver */
    LEARN_PARM lparm;
    KERNEL_PARM kparm;
    MODEL *svmModel = NULL;
    lparm.biased_hyperplane = 0;
    lparm.epsilon_crit = MIN(epsilon, 0.001);
    lparm.svm_c = C;
    lparm.sharedslack = 1;
    kparm.kernel_type = LINEAR;

    lparm.remove_inconsistent = 0;
    lparm.skip_final_opt_check = 0;
    lparm.svm_maxqpsize = 10;
    lparm.svm_newvarsinqp = 0;
    lparm.svm_iter_to_shrink = -9999;
    lparm.maxiter = 100000;
    lparm.kernel_cache_size = 40;
    lparm.eps = epsilon;
    lparm.transduction_posratio = -1.0;
    lparm.svm_costratio = 1.0;
    lparm.svm_costratio_unlab = 1.0;
    lparm.svm_unlabbound = 1E-5;
    lparm.epsilon_a = 1E-10;  /* changed from 1e-15 */
    lparm.compute_loo = 0;
    lparm.rho = 1.0;
    lparm.xa_depth = 0;
    strcpy(lparm.alphafile, "");
    kparm.poly_degree = 3;
    kparm.rbf_gamma = 1.0;
    kparm.coef_lin = 1;
    kparm.coef_const = 1;
    strcpy(kparm.custom, "empty");


    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b, sm->sizePsi);
    /* warm start */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w_b[i] = w[i];
    }

    iter = 0;
    size_active = 0;
    xi = 0.0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    idle = NULL;

    proximal_rhs = NULL;
    cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value;
    primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;
    primal_lower_bound = 0;
    expected_descent = -primal_obj_b;
    initial_primal_obj = primal_obj_b;

    //max_rho = C;
    max_rho = 100 * C; // tree-edge loss not within 0-1

    printf("Running CCCP inner loop solver: ");
    fflush(stdout);

    while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) {
        iter += 1;
        size_active += 1;

#if (DEBUG_LEVEL > 0)
        printf("ITER %d\n", iter);
#endif
        printf(".");
        fflush(stdout);


        /* add  constraint */
        dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active);
        assert(dXc != NULL);
        dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC));
        dXc[size_active - 1]->fvec = new_constraint;
        dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active - 1]->costfactor = 1.0;

        delta = (double *) realloc(delta, sizeof(double) * size_active);
        assert(delta != NULL);
        delta[size_active - 1] = margin;
        alpha = (double *) realloc(alpha, sizeof(double) * size_active);
        assert(alpha != NULL);
        alpha[size_active - 1] = 0.0;
        idle = (int *) realloc(idle, sizeof(int) * size_active);
        assert(idle != NULL);
        idle[size_active - 1] = 0;
        /* proximal point */
        proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active);
        assert(proximal_rhs != NULL);
        cut_error = (double *) realloc(cut_error, sizeof(double) * size_active);
        assert(cut_error != NULL);
        // note g_i = - new_constraint
        cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
        cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));

        gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active);
        assert(gammaG0 != NULL);

        /* update Gram matrix */
        G = (double **) realloc(G, sizeof(double *) * size_active);
        assert(G != NULL);
        G[size_active - 1] = NULL;
        for (j = 0; j < size_active; j++) {
            G[j] = (double *) realloc(G[j], sizeof(double) * size_active);
            assert(G[j] != NULL);
        }
        for (j = 0; j < size_active - 1; j++) {
            G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec);
            G[j][size_active - 1] = G[size_active - 1][j];
        }
        G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec);


        /* update gammaG0 */
        if (null_step == 1) {
            gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec);
        } else {
            for (i = 0; i < size_active; i++) {
                gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
            }
        }

        /* update proximal_rhs */
        for (i = 0; i < size_active; i++) {
            proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i];
        }


        /* solve QP to update alpha */
        //dual_obj = 0;
        //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
        if (size_active > 1) {
            if (svmModel != NULL) free_model(svmModel, 0);
            svmModel = (MODEL *) my_malloc(sizeof(MODEL));
            svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha);
        } else {
            assert(size_active == 1);
            alpha[0] = C;
        }
        /* DEBUG */
        //printf("r: %d\n", r); fflush(stdout);
        /* END DEBUG */

        clear_nvector(w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            if (alpha[j] > C * ALPHA_THRESHOLD) {
                add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho));
            }
        }

        /* compute dual obj */
        dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j];
        }

        z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi));

        add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho));


        /* detect if step size too small */
        sigma_k = 0;
        alphasum = 0;
        for (j = 0; j < size_active; j++) {
            sigma_k += alpha[j] * cut_error[j];
            alphasum += alpha[j];
        }
        sigma_k /= C;
        gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint));

#if (DEBUG_LEVEL > 0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
#endif


        /* update cleanup information */
        for (j = 0; j < size_active; j++) {
            if (alpha[j] < ALPHA_THRESHOLD * C) {
                idle[j]++;
            } else {
                idle[j] = 0;
            }
        }

        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);

        /* print primal objective */
        primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;

#if (DEBUG_LEVEL > 0)
        printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif


        temp_var = sprod_nn(w_b, w_b, sm->sizePsi);
        proximal_term = 0.0;
        for (i = 1; i < sm->sizePsi + 1; i++) {
            proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]);
        }

        reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term);

#if (DEBUG_LEVEL > 0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
#endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj < primal_obj_b + kappa * expected_descent) {
            /* extra condition to be met */
            if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) {
#if (DEBUG_LEVEL > 0)
                printf("SERIOUS STEP\n");
#endif
                /* update cut_error */
                for (i = 0; i < size_active; i++) {
                    cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
                    cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec);
                    cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));
                    cut_error[i] += C * sprod_ns(w, dXc[i]->fvec);
                }
                primal_obj_b = primal_obj;
                for (i = 1; i < sm->sizePsi + 1; i++) {
                    w_b[i] = w[i];
                }
                null_step = 0;
                serious_counter++;
            } else {
                /* increase step size */
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: SS(ii) FAILS.\n");
#endif
                serious_counter--;
                rho = MAX(rho / 10, min_rho);
            }
        } else { /* no sufficient decrease */
            serious_counter--;

            if ((cut_error[size_active - 1] > m3 * last_sigma_k) &&
                (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) {
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: NS(ii) FAILS.\n");
#endif
                rho = MIN(10 * rho, max_rho);
            }
#if (DEBUG_LEVEL > 0)
            else printf("NULL STEP\n");
#endif
        }
        /* update last_sigma_k */
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        /* break away from while loop if more than certain proportioal decrease in primal objective */
        if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) {
            suff_decrease_cond = 1;
        }

        /* clean up */
        if (iter % CLEANUP_CHECK == 0) {
            //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
            size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc,
                                         &cut_error);
        }


    } // end cutting plane while loop

    printf(" Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j = 0; j < size_active; j++) {
        free(G[j]);
        free_example(dXc[j], 0);
    }
    free(G);
    free(dXc);
    free(alpha);
    free(delta);
    free_svector(new_constraint);
    free(idle);
    free(gammaG0);
    free(proximal_rhs);
    free(cut_error);

    /* copy and free */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w[i] = w_b[i];
    }
    free(w_b);

    return (primal_obj_b);

}