Ejemplo n.º 1
0
CFLOAT single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 
     /* calculate the kernel function between two vectors */
{
  kernel_cache_statistic++;

  switch(kernel_parm->kernel_type) {
    case 0: /* linear */ 
            return((CFLOAT)sprod_ss(a,b)); 
    case 1: /* polynomial */
            return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
    case 2: /* radial basis function */
            return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq)));
    case 3: /* sigmoid neural net */
            return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); 
    case 4: /* custom-kernel supplied in file kernel.h*/
            return((CFLOAT)custom_kernel(kernel_parm,a,b)); 
  default: 
#ifdef MATLAB_MEX
    mexErrMsgTxt(ERR005);
#else
    printf("Error: Unknown kernel function\n"); 
    exit(1);
#endif
  }
}
Ejemplo n.º 2
0
SVECTOR *create_svector(WORD *words,char *userdefined,double factor)
{
  SVECTOR *vec;
  long    fnum,i;

  fnum=0;
  while(words[fnum].wnum) {
    fnum++;
  }
  fnum++;
  vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
  vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
  for(i=0;i<fnum;i++) { 
      vec->words[i]=words[i];
  }
  vec->twonorm_sq=sprod_ss(vec,vec);

  fnum=0;
  while(userdefined[fnum]) {
    fnum++;
  }
  fnum++;
  vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum));
  for(i=0;i<fnum;i++) { 
      vec->userdefined[i]=userdefined[i];
  }
  vec->kernel_id=0;
  vec->next=NULL;
  vec->factor=factor;
  return(vec);
}
Ejemplo n.º 3
0
CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) 
     /* calculate the kernel function */
{
  kernel_cache_statistic++;
  switch(kernel_parm->kernel_type) {
    case 0: /* linear */ 
            return((CFLOAT)sprod_ss(a->words,b->words)); 
    case 1: /* polynomial */
            return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
    case 2: /* radial basis function */
            return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
    case 3: /* sigmoid neural net */
            return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const)); 
    case 4: /* custom-kernel supplied in file kernel.h*/
            return((CFLOAT)custom_kernel(kernel_parm,a,b)); 
    default: printf("Error: Unknown kernel function\n"); exit(1);
  }
}
Ejemplo n.º 4
0
double sprod_i(DOC *a, DOC *b, int i, int j){ // compatibility with standard svm-light
       if(a->num_of_vectors>0 && b->num_of_vectors>0 ){
             if(a->vectors[i]==NULL || b->vectors[j]==NULL){
                printf("ERROR: first vector not defined (with a traditional kernel it must be defined)\n");
                exit(-1);
             }
             else return sprod_ss(a->vectors[i]->words,b->vectors[j]->words);
       }      
       return 0;
}
Ejemplo n.º 5
0
double model_length_n(MODEL *model) 
     /* compute length of weight vector */
{
  long     i,totwords=model->totwords+1;
  double   sum,*weight_n;
  SVECTOR  *weight;

  if(model->kernel_parm.kernel_type != LINEAR) {
    printf("ERROR: model_length_n applies only to linear kernel!\n");
    exit(1);
  }
  weight_n=create_nvector(totwords);
  clear_nvector(weight_n,totwords);
  for(i=1;i<model->sv_num;i++) 
    add_list_n_ns(weight_n,model->supvec[i]->fvec,model->alpha[i]);
  weight=create_svector_n(weight_n,totwords,NULL,1.0);
  sum=sprod_ss(weight,weight);
  free(weight_n);
  free_svector(weight);
  return(sqrt(sum));
}
Ejemplo n.º 6
0
double single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 
     /* calculate the kernel function between two vectors */
{
  kernel_cache_statistic++;
  switch(kernel_parm->kernel_type) {
    case LINEAR: /* linear */ 
            return(sprod_ss(a,b)); 
    case POLY:   /* polynomial */
            return(pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
    case RBF:    /* radial basis function */
            if(a->twonorm_sq<0) a->twonorm_sq=sprod_ss(a,a);
            if(b->twonorm_sq<0) b->twonorm_sq=sprod_ss(b,b);
            return(exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq)));
    case SIGMOID:/* sigmoid neural net */
            return(tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); 
    case CUSTOM: /* custom-kernel supplied in file kernel.h*/
            return(custom_kernel(kernel_parm,a,b)); 
    default: printf("Error: Unknown kernel function\n"); exit(1);
  }
}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
Ejemplo n.º 8
0
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j;
  double *alpha;
  DOC **dXc; // constraint matrix 
  double *delta; // rhs of constraints 
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **G = NULL;
	double **G2 = NULL;
	double **qmatrix = NULL;
	SVECTOR *f;
	int r;

  // set parameters for hideo solver 
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  // changed from 1e-15 
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  //qmatrix = (double **) malloc(sizeof(double *)*10);
  //assert(qmatrix!=NULL);

  printf("Running structural SVM solver: "); fflush(stdout); 
	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    // add  constraint 
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;


	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	//alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	//assert(alpha!=NULL);
   		//for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){
   		//	alpha[j] = 0.0;
   		//}
   		alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;

		idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;

		
		qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active);
  		assert(qmatrix!=NULL);

		qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size));
		for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){
			qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1));
		}

		// update Gram matrix 
		G = (double **) realloc(G, sizeof(double *)*size_active);
		assert(G!=NULL);
		G[size_active-1] = NULL;
		for(j = 0; j < size_active; j++) {
			G[j] = (double *) realloc(G[j], sizeof(double)*size_active);
			assert(G[j]!=NULL);
		}

		for(j = 0; j < size_active-1; j++) {
			G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
			G[size_active-1][j] = G[size_active-1][j]/2;
			G[j][size_active-1]  = G[size_active-1][j];
		}
		G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

		// hack: add a constant to the diagonal to make sure G is PSD 
		G[size_active-1][size_active-1] += 1e-6;

	   	// solve QP to update alpha 
		//r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size));
		r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0);
	    
		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			fflush(stdout);
			//exit(1);
			while(r==1295) {
				printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization);
				fflush(stdout);
				for(i=0;i<size_active;i++) {
					G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization;
				}
				sparm->gram_regularization *= 10;
				r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1);
			}
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

	   	clear_nvector(w,sm->sizePsi);
	   	for (j=0;j<size_active;j++) {
	     	if (alpha[j]>C*ALPHA_THRESHOLD) {
					add_vector_ns(w,dXc[j]->fvec,alpha[j]);
					idle[j] = 0;
	     	}
			else
				idle[j]++;
	   	}
	   	for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){
	   		if (alpha[size_active+j] > EQUALITY_EPSILON){
	   			w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j];
	   		}	   		
	   	}

	   	for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
	   		}
	   	}	   

	   	for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		//assert(w[j] <= 0);
	   		if(w[j]>0){
	   			printf("j = %ld, w[j] = %0.6f\n", j, w[j]);
	   			fflush(stdout);
	   		}
	   		
	   	}	

		cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);

		for(i = 0; i < size_active; i++) {
			cur_slack[i] = 0.0;
			for(f = dXc[i]->fvec; f; f = f->next) {
				j = 0;
				while(f->words[j].wnum) {
					cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight;
					j++;
				}
			}
			if(cur_slack[i] >= delta[i])
				cur_slack[i] = 0.0;
			else
				cur_slack[i] = delta[i]-cur_slack[i];
		}

		mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}

		if(size_active > 1)
			threshold = cur_slack[mv_iter];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter);
		}

		free(alpha);
		alpha=NULL;

 	} // end cutting plane while loop 

	//primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  // free memory
  for (j=0;j<size_active;j++) {
		free(G[j]);
    free_example(dXc[j],1);	
  }
	free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  //return(primal_obj);
  return;
}
Ejemplo n.º 9
0
SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm) {
/*
  Read input examples {(x_1,y_1),...,(x_n,y_n)} from file.
  The type of pattern x and label y has to follow the definition in 
  svm_struct_latent_api_types.h.  
*/
  SAMPLE sample;

  int i, j;
  SVECTOR *temp_sub=NULL;
  double vecDistance;
  long n_neighbors=0;

  // open the file containing candidate bounding box dimensions/labels/featurePath and image label
  FILE *fp = fopen(file, "r");
  if(fp==NULL){
      printf("Error: Cannot open input file %s\n",file);
      exit(1);
  }

  sample.n = 1;  
  sample.examples = (EXAMPLE *) malloc(sample.n*sizeof(EXAMPLE));
  if(!sample.examples) die("Memory error.");
  sample.examples[0].x.n_pos = 0;
  sample.examples[0].x.n_neg = 0;

  fscanf(fp,"%d", &sample.examples[0].n_imgs);
    
  // Initialise pattern 
  sample.examples[0].x.example_cost = 1;

  sample.examples[0].x.x_is = (SUB_PATTERN *) malloc(sample.examples[0].n_imgs*sizeof(SUB_PATTERN));
  if(!sample.examples[0].x.x_is) die("Memory error.");
  sample.examples[0].y.labels = (int *) malloc(sample.examples[0].n_imgs*sizeof(int));
  if(!sample.examples[0].y.labels) die("Memory error.");

  SVECTOR *temp=NULL;

  for(i = 0; i < sample.examples[0].n_imgs; i++){  
      fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi1_file_name);
      fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi2_file_name);
      fscanf(fp, "%d", &sample.examples[0].x.x_is[i].id);
      fscanf(fp, "%d", &sample.examples[0].y.labels[i]);

      sample.examples[0].x.x_is[i].phi1 = read_sparse_vector(sample.examples[0].x.x_is[i].phi1_file_name, sample.examples[0].x.x_is[i].id, sparm);
      sample.examples[0].x.x_is[i].phi2 = read_sparse_phi2(sample.examples[0].x.x_is[i].phi2_file_name, sparm);
      temp = create_svector_with_index(sample.examples[0].x.x_is[i].phi2->words, "", 1, sparm->phi1_size);
      sample.examples[0].x.x_is[i].phi1phi2_pos = add_ss(sample.examples[0].x.x_is[i].phi1, temp);
      free_svector(temp);
      sample.examples[0].x.x_is[i].phi1phi2_neg = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size));
      sample.examples[0].x.x_is[i].phi1phi2_shift = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size)*2);

      if(sample.examples[0].y.labels[i] == 1) {
          sample.examples[0].x.n_pos++;
      } 
      else{
          sample.examples[0].x.n_neg++;
      }
  }
  sample.examples[0].y.n_pos = sample.examples[0].x.n_pos;
  sample.examples[0].y.n_neg = sample.examples[0].x.n_neg;

  sample.examples[0].x.neighbors = (int **) malloc(sample.examples[0].n_imgs*sizeof(int*));
  sample.examples[0].x.n_neighbors=0;
  for (i = 0; i < sample.examples[0].n_imgs; i++){
      sample.examples[0].x.neighbors[i] = (int *) malloc(sample.examples[0].n_imgs*sizeof(int));
      for (j=(i+1); j < sample.examples[0].n_imgs; j++){
          temp_sub = sub_ss(sample.examples[0].x.x_is[i].phi2, sample.examples[0].x.x_is[j].phi2);
          vecDistance = sprod_ss(temp_sub, temp_sub);
          free_svector(temp_sub);
          if(vecDistance < sparm->pairwise_threshold){
            sample.examples[0].x.neighbors[i][j]=1;
            sample.examples[0].x.n_neighbors++;
          }
          else{
            sample.examples[0].x.neighbors[i][j]=0;
          }
      }
  }
  printf("No of neighbors = %d\n",sample.examples[0].x.n_neighbors);
  fflush(stdout);

  return(sample);
}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, /*double epsilon,*/ SVECTOR **fycache, EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i,j;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj/*, alphasum*/;
    int iter, size_active, no_violation_iter;
    double value;
    //int r;
    //int *idle; /* for cleaning up */
    double margin;
    //double primal_obj;
    double lower_bound, approx_upper_bound;
    double *proximal_rhs;
    //double *gammaG0=NULL;
    //double min_rho = 0.001;
    //double max_rho;
    //double serious_counter=0;
    //double rho = 1.0;

    //double expected_descent, primal_obj_b=-1, reg_master_obj;
    //int null_step=1;
    //double *w_b;
    //double kappa=0.01;
    //double temp_var;
    //double proximal_term, primal_lower_bound;

    //double v_k;
    //double obj_difference;
    // double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    //double sigma_k;
    //double m2 = 0.2;
    //double m3 = 0.9;
    //double gTd;
    //double last_sigma_k=0;

    //double initial_primal_obj;
    //int suff_decrease_cond=0;
    //double decrease_proportion = 0.2; // start from 0.2 first

    //double z_k_norm;
    //double last_z_k_norm=0;


    /*
    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b,sm->sizePsi);
    // warm start
    for (i=1;i<sm->sizePsi+1;i++) {
      w_b[i] = w[i];
    }*/

    iter = 0;
    no_violation_iter = 0;
    size_active = 0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    //idle = NULL;

    proximal_rhs = NULL;
    //cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    //primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value;
    //primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
    //primal_lower_bound = 0;
    //expected_descent = -primal_obj_b;
    //initial_primal_obj = primal_obj_b;

    //max_rho = C;

    // Non negative weight constraints
    int nNonNeg = sm->sizePsi - sm->firstNonNegWeightIndex + 1;
    G = (double**)malloc(sizeof(double*)*nNonNeg);
    for (j=0; j<nNonNeg; j++) {
        G[j] = (double*)malloc(sizeof(double)*nNonNeg);
        for (int k=0; k<nNonNeg; k++) {
            G[j][k] = 0;
        }

        G[j][j] = 1.0;
    }
    double* alphabeta = NULL;

    while (/*(!suff_decrease_cond)&&(expected_descent<-epsilon)&&*/(iter<MAX_ITER)&&(no_violation_iter<MAX_INNER_ITER_NO_VIOLATION)) {
        LearningTracker::NextInnerIteration();
        iter+=1;
        size_active+=1;

#if (DEBUG_LEVEL>0)
        printf("INNER ITER %d\n", iter);
#endif

        /* add  constraint */
        dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
        assert(dXc!=NULL);
        dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
        dXc[size_active-1]->fvec = new_constraint;
        dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active-1]->costfactor = 1.0;

        delta = (double*)realloc(delta, sizeof(double)*size_active);
        assert(delta!=NULL);
        delta[size_active-1] = margin;

        alphabeta = (double*)realloc(alphabeta, sizeof(double)*(size_active+nNonNeg));
        assert(alphabeta!=NULL);
        alphabeta[size_active+nNonNeg-1] = 0.0;

        /*idle = (int*)realloc(idle, sizeof(int)*size_active);
        assert(idle!=NULL);
        idle[size_active-1] = 0;*/

        /* proximal point */
        proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*(size_active+nNonNeg));
        assert(proximal_rhs!=NULL);

        /*cut_error = (double*)realloc(cut_error, sizeof(double)*size_active);
        assert(cut_error!=NULL);
        // note g_i = - new_constraint
        cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
        cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); */

        /*gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
        assert(gammaG0!=NULL);*/

        /* update Gram matrix */
        G = (double**)realloc(G, sizeof(double*)*(size_active+nNonNeg));
        assert(G!=NULL);
        G[size_active+nNonNeg-1] = NULL;
        for (j=0; j<size_active+nNonNeg; j++) {
            G[j] = (double*)realloc(G[j], sizeof(double)*(size_active+nNonNeg));
            assert(G[j]!=NULL);
        }
        for (j=0; j<size_active-1; j++) {
            G[size_active+nNonNeg-1][j+nNonNeg] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
            G[j+nNonNeg][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j+nNonNeg];
        }
        G[size_active+nNonNeg-1][size_active+nNonNeg-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

        for (j=0; j<nNonNeg; j++) {
            WORD indicator[2];
            indicator[0].wnum = j + sm->firstNonNegWeightIndex;
            indicator[0].weight = 1.0;
            indicator[1].wnum = 0;
            indicator[1].weight = 0.0;
            SVECTOR* indicator_vec = create_svector(indicator, NULL, 1.0);
            G[size_active+nNonNeg-1][j] = sprod_ss(dXc[size_active-1]->fvec, indicator_vec);
            G[j][size_active+nNonNeg-1] = G[size_active+nNonNeg-1][j];
            free_svector(indicator_vec);
        }

        /* update gammaG0 */
        /*if (null_step==1) {
          gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
        } else {
          for (i=0;i<size_active;i++) {
            gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
          }
        }*/

        /* update proximal_rhs */
        for (i=0; i<size_active; i++) {
            proximal_rhs[i+nNonNeg] = -delta[i]; //(1+rho) * (rho * gammaG0[i] - (1 + rho) * delta[i]);
        }
        for (i=0; i<nNonNeg; i++) {
            proximal_rhs[i] = 0; //w_b[i + 1]*rho * (1+rho);
        }

        /* DEBUG */
        /*
        for (i = 0; i < size_active + nNonNeg; ++i) {
        	printf("G[%d]=", i);
        	for (j = 0; j < size_active + nNonNeg; ++j) {
        		printf("%.4f ", G[i][j]);
        	}
        	printf("\n");
        }
        printf("\n");
        for (i = 0; i < size_active + nNonNeg; ++i)
        	printf("proximal_rhs[%d]=%.4f\n", i, proximal_rhs[i]);
        */

        /* solve QP to update alpha */
        dual_obj = 0;
        mosek_qp_optimize(G, proximal_rhs, alphabeta, (long) size_active+nNonNeg, C, &dual_obj, nNonNeg);
        printf("dual_obj=%.4lf\n", dual_obj);

        alpha = alphabeta + nNonNeg;

        clear_nvector(w,sm->sizePsi);
        for (i = 0; i < nNonNeg; i++) {
            w[sm->firstNonNegWeightIndex + i] = alphabeta[i];//alphabeta[i]/(1+rho);  // add betas
        }
        for (j=0; j<size_active; j++) {
            if (alpha[j]>C*ALPHA_THRESHOLD) {
                //add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
                add_vector_ns(w,dXc[j]->fvec,alpha[j]);
            }
        }

        //z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi));

        //add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

        LearningTracker::ReportWeights(w, sm->sizePsi);

        /* detect if step size too small */
        /*
        sigma_k = 0;
        alphasum = 0;
        for (j=0;j<size_active;j++) {
          sigma_k += alpha[j]*cut_error[j];
          alphasum+=alpha[j];
        }
        sigma_k/=C;
        gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

        #if (DEBUG_LEVEL>0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
        #endif
        */

        /* update cleanup information */
        /*
        for (j=0;j<size_active;j++) {
          if (alpha[j]<ALPHA_THRESHOLD*C) {
        idle[j]++;
          } else {
            idle[j]=0;
          }
        }
        */

        // update lower bound
        double xi = -1e+20;
        for (i = 0; i < size_active; ++i) {
            xi = MAX(xi, delta[i] - sprod_ns(w, dXc[i]->fvec));
        }
        lower_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*xi;
        printf("lower_bound=%.4lf\n", lower_bound);
        assert(fabs(lower_bound + dual_obj) < 1e-6);
        LearningTracker::ReportLowerBound(lower_bound);

        // find new constraint
        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);
        double violation = value - xi;
        if (violation > CUTTING_PLANE_EPS) {
            printf("New constraint is violated by %.4lf\n", violation);
            no_violation_iter = 0;
        } else {
            ++no_violation_iter;
            printf("New constraint is underviolated by %.4lf\n", violation);
            printf("%d more such constraints to stop\n", MAX_INNER_ITER_NO_VIOLATION - no_violation_iter);
        }

        // update upper bound
        approx_upper_bound = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value;
        printf("approx_upper_bound=%.4lf\n", approx_upper_bound);
        LearningTracker::ReportUpperBound(approx_upper_bound);

        /*
        temp_var = sprod_nn(w_b,w_b,sm->sizePsi);
        proximal_term = 0.0;
        for (i=1;i<sm->sizePsi+1;i++) {
          proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
        }

        reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);
        LearningTracker::ReportLowerBoundValue(reg_master_obj);

        #if (DEBUG_LEVEL>0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMAL_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
        #endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj<primal_obj_b+kappa*expected_descent) {
          // extra condition to be met
          if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
        #if (DEBUG_LEVEL>0)
        printf("SERIOUS STEP\n");
        #endif
        // update cut_error
        for (i=0;i<size_active;i++) {
          cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi));
          cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec);
          cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
          cut_error[i] += C*sprod_ns(w, dXc[i]->fvec);
        }
        primal_obj_b = primal_obj;
        for (i=1;i<sm->sizePsi+1;i++) {
          w_b[i] = w[i];
        }
        null_step = 0;
        serious_counter++;
          } else {
        // increase step size
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: SS(ii) FAILS.\n");
        #endif
        serious_counter--;
        rho = MAX(rho/10,min_rho);
          }
        } else { // no sufficient decrease
          serious_counter--;
          if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
        #if (DEBUG_LEVEL>0)
        printf("NULL STEP: NS(ii) FAILS.\n");
        #endif
        rho = MIN(10*rho,max_rho);
          }
        #if (DEBUG_LEVEL>0)
          else printf("NULL STEP\n");
        #endif
        }
        // update last_sigma_k
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        // break away from while loop if more than certain proportioal decrease in primal objective
        if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
          suff_decrease_cond = 1;
        }

        // clean up
        if (iter % CLEANUP_CHECK == 0) {
          size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
        }
        */

    } // end cutting plane while loop

    printf("Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j=0; j<size_active; j++) {
        free(G[j]);
        free_example(dXc[j],0);
    }
    free(G);
    free(dXc);
    free(alphabeta);
    free(delta);
    free_svector(new_constraint);
    //free(idle);
    //free(gammaG0);
    free(proximal_rhs);
    //free(cut_error);

    /* copy and free */
    /*for (i=1;i<sm->sizePsi+1;i++) {
      w[i] = w_b[i];
    }
    free(w_b);*/

    //return(primal_obj_b);
    return lower_bound;
}
Ejemplo n.º 11
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache,
                               EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i, j;
    double xi;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj, alphasum;
    int iter, size_active;
    double value;
    int r;
    int *idle; /* for cleaning up */
    double margin;
    double primal_obj;
    double *proximal_rhs;
    double *gammaG0 = NULL;
    double min_rho = 0.001;
    double max_rho;
    double serious_counter = 0;
    double rho = 1.0; /* temporarily set it to 1 first */

    double expected_descent, primal_obj_b = -1, reg_master_obj;
    int null_step = 1;
    double *w_b;
    double kappa = 0.1;
    double temp_var;
    double proximal_term, primal_lower_bound;

    double v_k;
    double obj_difference;
    double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    double sigma_k;
    double m2 = 0.2;
    double m3 = 0.9;
    double gTd;
    double last_sigma_k = 0;

    double initial_primal_obj;
    int suff_decrease_cond = 0;
    double decrease_proportion = 0.2; // start from 0.2 first

    double z_k_norm;
    double last_z_k_norm = 0;


    /* set parameters for hideo solver */
    LEARN_PARM lparm;
    KERNEL_PARM kparm;
    MODEL *svmModel = NULL;
    lparm.biased_hyperplane = 0;
    lparm.epsilon_crit = MIN(epsilon, 0.001);
    lparm.svm_c = C;
    lparm.sharedslack = 1;
    kparm.kernel_type = LINEAR;

    lparm.remove_inconsistent = 0;
    lparm.skip_final_opt_check = 0;
    lparm.svm_maxqpsize = 10;
    lparm.svm_newvarsinqp = 0;
    lparm.svm_iter_to_shrink = -9999;
    lparm.maxiter = 100000;
    lparm.kernel_cache_size = 40;
    lparm.eps = epsilon;
    lparm.transduction_posratio = -1.0;
    lparm.svm_costratio = 1.0;
    lparm.svm_costratio_unlab = 1.0;
    lparm.svm_unlabbound = 1E-5;
    lparm.epsilon_a = 1E-10;  /* changed from 1e-15 */
    lparm.compute_loo = 0;
    lparm.rho = 1.0;
    lparm.xa_depth = 0;
    strcpy(lparm.alphafile, "");
    kparm.poly_degree = 3;
    kparm.rbf_gamma = 1.0;
    kparm.coef_lin = 1;
    kparm.coef_const = 1;
    strcpy(kparm.custom, "empty");


    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b, sm->sizePsi);
    /* warm start */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w_b[i] = w[i];
    }

    iter = 0;
    size_active = 0;
    xi = 0.0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    idle = NULL;

    proximal_rhs = NULL;
    cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value;
    primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;
    primal_lower_bound = 0;
    expected_descent = -primal_obj_b;
    initial_primal_obj = primal_obj_b;

    //max_rho = C;
    max_rho = 100 * C; // tree-edge loss not within 0-1

    printf("Running CCCP inner loop solver: ");
    fflush(stdout);

    while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) {
        iter += 1;
        size_active += 1;

#if (DEBUG_LEVEL > 0)
        printf("ITER %d\n", iter);
#endif
        printf(".");
        fflush(stdout);


        /* add  constraint */
        dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active);
        assert(dXc != NULL);
        dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC));
        dXc[size_active - 1]->fvec = new_constraint;
        dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active - 1]->costfactor = 1.0;

        delta = (double *) realloc(delta, sizeof(double) * size_active);
        assert(delta != NULL);
        delta[size_active - 1] = margin;
        alpha = (double *) realloc(alpha, sizeof(double) * size_active);
        assert(alpha != NULL);
        alpha[size_active - 1] = 0.0;
        idle = (int *) realloc(idle, sizeof(int) * size_active);
        assert(idle != NULL);
        idle[size_active - 1] = 0;
        /* proximal point */
        proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active);
        assert(proximal_rhs != NULL);
        cut_error = (double *) realloc(cut_error, sizeof(double) * size_active);
        assert(cut_error != NULL);
        // note g_i = - new_constraint
        cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
        cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));

        gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active);
        assert(gammaG0 != NULL);

        /* update Gram matrix */
        G = (double **) realloc(G, sizeof(double *) * size_active);
        assert(G != NULL);
        G[size_active - 1] = NULL;
        for (j = 0; j < size_active; j++) {
            G[j] = (double *) realloc(G[j], sizeof(double) * size_active);
            assert(G[j] != NULL);
        }
        for (j = 0; j < size_active - 1; j++) {
            G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec);
            G[j][size_active - 1] = G[size_active - 1][j];
        }
        G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec);


        /* update gammaG0 */
        if (null_step == 1) {
            gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec);
        } else {
            for (i = 0; i < size_active; i++) {
                gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
            }
        }

        /* update proximal_rhs */
        for (i = 0; i < size_active; i++) {
            proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i];
        }


        /* solve QP to update alpha */
        //dual_obj = 0;
        //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
        if (size_active > 1) {
            if (svmModel != NULL) free_model(svmModel, 0);
            svmModel = (MODEL *) my_malloc(sizeof(MODEL));
            svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha);
        } else {
            assert(size_active == 1);
            alpha[0] = C;
        }
        /* DEBUG */
        //printf("r: %d\n", r); fflush(stdout);
        /* END DEBUG */

        clear_nvector(w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            if (alpha[j] > C * ALPHA_THRESHOLD) {
                add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho));
            }
        }

        /* compute dual obj */
        dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j];
        }

        z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi));

        add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho));


        /* detect if step size too small */
        sigma_k = 0;
        alphasum = 0;
        for (j = 0; j < size_active; j++) {
            sigma_k += alpha[j] * cut_error[j];
            alphasum += alpha[j];
        }
        sigma_k /= C;
        gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint));

#if (DEBUG_LEVEL > 0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
#endif


        /* update cleanup information */
        for (j = 0; j < size_active; j++) {
            if (alpha[j] < ALPHA_THRESHOLD * C) {
                idle[j]++;
            } else {
                idle[j] = 0;
            }
        }

        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);

        /* print primal objective */
        primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;

#if (DEBUG_LEVEL > 0)
        printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif


        temp_var = sprod_nn(w_b, w_b, sm->sizePsi);
        proximal_term = 0.0;
        for (i = 1; i < sm->sizePsi + 1; i++) {
            proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]);
        }

        reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term);

#if (DEBUG_LEVEL > 0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
#endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj < primal_obj_b + kappa * expected_descent) {
            /* extra condition to be met */
            if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) {
#if (DEBUG_LEVEL > 0)
                printf("SERIOUS STEP\n");
#endif
                /* update cut_error */
                for (i = 0; i < size_active; i++) {
                    cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
                    cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec);
                    cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));
                    cut_error[i] += C * sprod_ns(w, dXc[i]->fvec);
                }
                primal_obj_b = primal_obj;
                for (i = 1; i < sm->sizePsi + 1; i++) {
                    w_b[i] = w[i];
                }
                null_step = 0;
                serious_counter++;
            } else {
                /* increase step size */
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: SS(ii) FAILS.\n");
#endif
                serious_counter--;
                rho = MAX(rho / 10, min_rho);
            }
        } else { /* no sufficient decrease */
            serious_counter--;

            if ((cut_error[size_active - 1] > m3 * last_sigma_k) &&
                (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) {
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: NS(ii) FAILS.\n");
#endif
                rho = MIN(10 * rho, max_rho);
            }
#if (DEBUG_LEVEL > 0)
            else printf("NULL STEP\n");
#endif
        }
        /* update last_sigma_k */
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        /* break away from while loop if more than certain proportioal decrease in primal objective */
        if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) {
            suff_decrease_cond = 1;
        }

        /* clean up */
        if (iter % CLEANUP_CHECK == 0) {
            //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
            size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc,
                                         &cut_error);
        }


    } // end cutting plane while loop

    printf(" Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j = 0; j < size_active; j++) {
        free(G[j]);
        free_example(dXc[j], 0);
    }
    free(G);
    free(dXc);
    free(alpha);
    free(delta);
    free_svector(new_constraint);
    free(idle);
    free(gammaG0);
    free(proximal_rhs);
    free(cut_error);

    /* copy and free */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w[i] = w_b[i];
    }
    free(w_b);

    return (primal_obj_b);

}
Ejemplo n.º 12
0
int parse_document(char *line, DOC *doc, double *label, 
		   long int *numwords, long int max_words_doc)
{
  register long wpos,pos;
  long wnum;
  double weight;
  int numread;
  char featurepair[1000],junk[1000];

  doc->queryid=0;
  doc->costfactor=1;

  pos=0;
  while(line[pos]) {      /* cut off comments */
    if(line[pos] == '#') {
      line[pos]=0;
    }
    else {
      pos++;
    }
  }
  wpos=0;
  if(sscanf(line,"%lf",label) == EOF) return(0);
  pos=0;
  while(isspace((int)line[pos])) pos++;
  while((!isspace((int)line[pos])) && line[pos]) pos++;
  while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) && 
	(wpos<max_words_doc)) {
    /* printf("%s\n",featurepair); */
    while(isspace((int)line[pos])) pos++;
    while((!isspace((int)line[pos])) && line[pos]) pos++;
    if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) {
      /* it is the query id */
      doc->queryid=(long)wnum;
    }
    else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) {
      /* it is the example-dependent cost factor */
      doc->costfactor=(double)weight;
    }
    else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) {
      /* it is a regular feature */
      if(wnum<=0) { 
	perror ("Feature numbers must be larger or equal to 1!!!\n"); 
	printf("LINE: %s\n",line);
	exit (1); 
      }
      if((wpos>0) && ((doc->words[wpos-1]).wnum >= wnum)) { 
	perror ("Features must be in increasing order!!!\n"); 
	printf("LINE: %s\n",line);
	exit (1); 
      }
      (doc->words[wpos]).wnum=wnum;
      (doc->words[wpos]).weight=(FVAL)weight; 
      wpos++;
    }
    else {
      perror ("Cannot parse feature/value pair!!!\n"); 
      printf("'%s' in LINE: %s\n",featurepair,line);
      exit (1); 
    }
  }
  (doc->words[wpos]).wnum=0;
  (*numwords)=wpos+1;
  doc->docnum=-1;
  doc->twonorm_sq=sprod_ss(doc->words,doc->words);
  return(1);
}
Ejemplo n.º 13
0
void read_model(char *modelfile, MODEL *model, long int max_words, long int ll)
{
  FILE *modelfl;
  long j,i;
  char *line;
  WORD *words;
  register long wpos;
  long wnum,pos;
  double weight;
  char version_buffer[100];
  int numread;

  if(verbosity>=1) {
    printf("Reading model..."); fflush(stdout);
  }
  words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
  line = (char *)my_malloc(sizeof(char)*ll);

  if ((modelfl = fopen (modelfile, "r")) == NULL)
  { perror (modelfile); exit (1); }

  fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
  if(strcmp(version_buffer,VERSION_SVMLIGHT)) {
    perror ("Version of model-file does not match version of svm_classify!"); 
    exit (1); 
  }
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);

  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);

  for(i=1;i<model->sv_num;i++) {
    fgets(line,(int)ll,modelfl);
    pos=0;
    wpos=0;
    sscanf(line,"%lf",&model->alpha[i]);
    while(!isspace((int)line[++pos]));
    while(((numread=sscanf(line+pos,"%ld:%lf",&wnum,&weight)) != EOF) 
	  && (wpos<max_words)) {
      if(numread != 2) {
	perror("Parsing error while reading model!");
	printf("LINE: %s\n",line);
      }
      while(!isspace((int)line[++pos]));
      words[wpos].wnum=wnum;
      words[wpos].weight=(FVAL)weight; 
      wpos++;
    } 
    model->supvec[i] = (DOC *)my_malloc(sizeof(DOC));
    (model->supvec[i])->words = (WORD *)my_malloc(sizeof(WORD)*(wpos+1));
    for(j=0;j<wpos;j++) {
      (model->supvec[i])->words[j]=words[j]; 
    }
    ((model->supvec[i])->words[wpos]).wnum=0;
    (model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words,
					      (model->supvec[i])->words);
    (model->supvec[i])->docnum = -1;
  }
  fclose(modelfl);
  free(line);
  free(words);
  if(verbosity>=1) {
    fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
  }
}