コード例 #1
0
ファイル: svm_struct_learn.cpp プロジェクト: aa755/cfg3d
MATRIX *update_kernel_matrix(MATRIX *matrix, int newpos, CONSTSET *cset, 
			     KERNEL_PARM *kparm) 
     /* assigns new kernelid to constraint in position newpos and
	fills the corresponding part of the kernel matrix */
{
  int i,maxkernelid=0,newid;
  double kval;
  double *used;

  /* find free kernelid to assign to new constraint */
  for(i=0;i<cset->m;i++) 
    if(i != newpos)
      maxkernelid=MAX(maxkernelid,cset->lhs[i]->kernelid);
  used=create_nvector(maxkernelid+2);
  clear_nvector(used,maxkernelid+2);
  for(i=0;i<cset->m;i++) 
    if(i != newpos)
      used[cset->lhs[i]->kernelid]=1;
  for(newid=0;used[newid];newid++);
  free_nvector(used);
  cset->lhs[newpos]->kernelid=newid;

  /* extend kernel matrix if necessary */
  maxkernelid=MAX(maxkernelid,newid);
  if((!matrix) || (maxkernelid>=matrix->m))
    matrix=realloc_matrix(matrix,maxkernelid+50,maxkernelid+50);

  for(i=0;i<cset->m;i++) {
    kval=kernel(kparm,cset->lhs[newpos],cset->lhs[i]);
    matrix->element[newid][cset->lhs[i]->kernelid]=kval;
    matrix->element[cset->lhs[i]->kernelid][newid]=kval;
  }
  return(matrix);
}
コード例 #2
0
SVECTOR* add_list_ns(SVECTOR *a) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR. assumes that the number of
	features is small compared to the number of elements in the
	list */
{
    SVECTOR *vec,*f;
    register WORD *ai;
    long totwords;
    double *sum;

    /* find max feature number */
    totwords=0;
    for(f=a;f;f=f->next) {
      ai=f->words;
      while (ai->wnum) {
	if(totwords<ai->wnum) 
	  totwords=ai->wnum;
	ai++;
      }
    }
    sum=create_nvector(totwords);
    /* printf("totwords=%ld, %p\n",totwords, (void *)sum); */

    clear_nvector(sum,totwords);
    for(f=a;f;f=f->next)  
      add_vector_ns(sum,f,f->factor);

    vec=create_svector_n(sum,totwords,"",1.0);
    free(sum);

    return(vec);
}
コード例 #3
0
void add_weight_vector_to_linear_model(MODEL *model)
     /* compute weight vector in linear case and add to model */
{
  long i;
  SVECTOR *f;

  model->lin_weights=create_nvector(model->totwords);
  clear_nvector(model->lin_weights,model->totwords);
  for(i=1;i<model->sv_num;i++) {
    for(f=(model->supvec[i])->fvec;f;f=f->next)  
      add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]);
  }
}
コード例 #4
0
double *prod_matrix_nvector(MATRIX *A, double *v)
/* For column vector v and matrix A (assumed to match in size), computes w=A*v */
{
  int i,j;
  double sum;
  double *w;
  
  w=create_nvector(A->n);

  for (i=0;i<A->n;i++) {
    sum=0.0;
    for (j=0;j<A->m;j++) {
      sum+=v[j]*A->element[i][j];
    }
    w[i]=sum;
  }

  return(w);
}
コード例 #5
0
double *prod_nvector_matrix(double *v, MATRIX *A)
/* For column vector v and matrix A (assumed to match in size), computes w^T=v^T*A */
{
  int i,j;
  double sum;
  double *w;
  
  w=create_nvector(A->m);

  for (i=0;i<A->m;i++) {
    sum=0.0;
    for (j=0;j<A->n;j++) {
      sum+=v[j]*A->element[j][i];
    }
    w[i]=sum;
  }

  return(w);
}
コード例 #6
0
double* add_list_nn(SVECTOR *a, long totwords) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR. assumes that the number of
	features is small compared to the number of elements in the
	list */
{
    SVECTOR *f;
    long i;
    double *sum;

    sum=create_nvector(totwords);

    for(i=0;i<=totwords;i++) 
      sum[i]=0;

    for(f=a;f;f=f->next)  
      add_vector_ns(sum,f,f->factor);

    return(sum);
}
コード例 #7
0
ファイル: svm_common.c プロジェクト: a061105/ConvexLatentSVM
double model_length_n(MODEL *model) 
     /* compute length of weight vector */
{
  long     i,totwords=model->totwords+1;
  double   sum,*weight_n;
  SVECTOR  *weight;

  if(model->kernel_parm.kernel_type != LINEAR) {
    printf("ERROR: model_length_n applies only to linear kernel!\n");
    exit(1);
  }
  weight_n=create_nvector(totwords);
  clear_nvector(weight_n,totwords);
  for(i=1;i<model->sv_num;i++) 
    add_list_n_ns(weight_n,model->supvec[i]->fvec,model->alpha[i]);
  weight=create_svector_n(weight_n,totwords,NULL,1.0);
  sum=sprod_ss(weight,weight);
  free(weight_n);
  free_svector(weight);
  return(sqrt(sum));
}
コード例 #8
0
double *find_indep_subset_of_matrix(MATRIX *A, double epsilon)
/* Given a positive-semidefinite symmetric matrix A[0..n-1][0..n-1], this routine finds a subset of rows and colums that is linear independent. To do this, it constructs the Cholesky decomposition, A = L · LT. On input, only the upper triangle of A need be given; A is not modified. The routine returns a vector in which non-zero elements indicate the linear independent subset. epsilon is the amount by which the diagonal entry of L has to be greater than zero. */ 
{
  int i,j,k,n;
  double sum,*indep;
  MATRIX *L;
  
  if(A->m != A->n) {
    printf("ERROR: Matrix not quadratic. Cannot compute Cholesky!\n");
    exit(1);
  }
  n=A->n;
  L=copy_matrix(A);

  for (i=0;i<n;i++) {
    for (j=i;j<n;j++) {
      for (sum=L->element[i][j],k=i-1;k>=0;k--) 
	sum -= L->element[i][k]*L->element[j][k];
      if (i == j) {
	if (sum <= epsilon) sum=0;
	L->element[i][i]=sqrt(sum);
      } 
      else 
	if(L->element[i][i] == 0)
	  L->element[j][i]=0;
	else
	  L->element[j][i]=sum/L->element[i][i];
    }
  }
  /* Gather non-zero diagonal elements */
  indep=create_nvector(n);
  for (i=0;i<n;i++) 
      indep[i]=L->element[i][i];

  free_matrix(L);
  return(indep);
}
コード例 #9
0
int main(int argc, char* argv[]) {

  // The file to create the online version of the code

  printf("Runs with F1 loss in the loss-augmented objective .. only positive data .. with weighting of Fscores .. no regions file");

//  double *w; /* weight vector */
  double C, epsilon, Cdash;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  char trainfile[1024];
  char modelfile[1024];
  int MAX_ITER;

  SAMPLE sample;
  STRUCT_LEARN_PARM sparm;
  STRUCTMODEL sm;

  /* read input parameters */
  my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm);

  epsilon = learn_parm.eps;
  C = learn_parm.svm_c;
  Cdash = learn_parm.Cdash;
  MAX_ITER = learn_parm.maxiter;

  /* read in examples */
  //strcpy(trainfile, "dataset/reidel_trainSVM.small.data");
  sample = read_struct_examples(trainfile,&sparm);
  
  /* initialization */
  init_struct_model(sample,&sm,&sparm,&learn_parm,&kernel_parm);

  // (OnlineSVM : Commenting 'w' as they are replaced by 'w_iters'
//  w = create_nvector(sm.sizePsi);
//  clear_nvector(w, sm.sizePsi);
//  sm.w = w; /* establish link to w, as long as w does not change pointer */

		double *zeroes = create_nvector(sm.sizePsi);
		clear_nvector(zeroes, sm.sizePsi);

//  printf("Addr. of w (init) %x\t%x\n",w,sm.w);

 		time_t time_start_full, time_end_full;
		int eid,totalEpochs=learn_parm.totalEpochs;
		int chunkid, numChunks=learn_parm.numChunks;
		double primal_obj_sum, primal_obj;
		char chunk_trainfile[1024];
		SAMPLE * chunk_dataset = (SAMPLE *) malloc(sizeof(SAMPLE)*numChunks);

		/**
			   * If we have ‘k’ instances and do ‘n’ epochs, after processing each chunk we update the weight.
			   * Since we do ‘k’ updates, we will have ‘k’ weight vectors after each epoch.
			   * After ‘n’ epochs, we will have ‘k*n’ weight vectors.
		*/
		// --------------------------------------------------------------------------------------------------------------------------------
		double ***w_iters = (double**) malloc(totalEpochs*sizeof(double**));
//		printf("--2: After 1st malloc -- %x; sz = %d\n", w_iters, totalEpochs*sizeof(double**));
		for(eid = 0; eid < totalEpochs; eid++){
			w_iters[eid] = (double*) malloc(numChunks*sizeof(double*));
//			printf("2.5... id = %d, .. allocated ... %x; sz = %d\n",eid, w_iters[eid],numChunks*sizeof(double*));
		}
		printf("--3: After 2nd malloc \n");
		for(eid = 0; eid < totalEpochs; eid++){
			for(chunkid = 0; chunkid < numChunks; chunkid++){
				w_iters[eid][chunkid] = create_nvector(sm.sizePsi);
//				printf("Confirming memory location : %x\n",w_iters[eid][chunkid]);
				clear_nvector(w_iters[eid][chunkid], sm.sizePsi);
			}
		}
		sm.w_iters = w_iters;
		printf("(ONLINE SVM) Completed the memory alloc for the parameters\n");
		// --------------------------------------------------------------------------------------------------------------------------------

		/**
		 * Having divided the dataset (X,Y) into set of 'k' chunks / sub-datasets (X_1,Y_1) ... (X_k, Y_k)
		 * Do the following do while routine for one set of datapoints (sub-datasets)
		 */
		// --------------------------------------------------------------------------------------------------------------------------------
		printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Changed .... Calling Java to split dataset\n");
		char *cmd = malloc(1000);
		strcpy(cmd,"java -Xmx1G -cp java/bin:java/lib/* "
				" javaHelpers.splitDataset ");
		strcat(cmd, trainfile);
		strcat(cmd, " ");
		char numChunks_str[10]; sprintf(numChunks_str, "%d", numChunks);
		strcat(cmd, numChunks_str);
		strcat(cmd, " ");

		printf("Executing cmd : %s\n", cmd);fflush(stdout);
		system(cmd);
		// --------------------------------------------------------------------------------------------------------------------------------

		for(chunkid = 0; chunkid < numChunks; chunkid++)
		{
			memset(chunk_trainfile, 0, 1024);
			strcat(chunk_trainfile,trainfile);
			strcat(chunk_trainfile,".chunks/chunk."); // NOTE: Name hard-coded according to the convention used to create chunked files
			char chunkid_str[10];sprintf(chunkid_str, "%d", chunkid);
			strcat(chunk_trainfile,chunkid_str);
			printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Changed .... Reading chunked dataset\n");
			printf("Chunk trainfile : %s\n",chunk_trainfile);
			chunk_dataset[chunkid] = read_struct_examples_chunk(chunk_trainfile);
		}

		time(&time_start_full);
		for(eid = 0; eid < totalEpochs; eid++)
		{
			printf("(ONLINE LEARNING) : EPOCH %d\n",eid);
			primal_obj_sum = 0.0;
			for(chunkid = 0; chunkid < numChunks; chunkid++) // NOTE: Chunkid starts from 1 and goes upto numChumks
			{

				int sz = sample.n / numChunks;
				int datasetStartIdx = (chunkid) * sz;
				int chunkSz = (numChunks-1 == chunkid) ? (sample.n - ((numChunks-1)*sz) ) : (sz);

				primal_obj = optimizeMultiVariatePerfMeasure(chunk_dataset[chunkid], datasetStartIdx, chunkSz,
						&sm, &sparm, C, Cdash, epsilon, MAX_ITER, &learn_parm, trainfile, w_iters, eid, chunkid, numChunks, zeroes);

				printf("(ONLINE LEARNING) : FINISHED PROCESSING CHUNK (PSEUDO-DATAPOINT) %d of %d\n",chunkid+1, numChunks);
				primal_obj_sum += primal_obj;
				printf("(OnlineSVM) : Processed pseudo-datapoint -- primal objective sum: %.4f\n", primal_obj_sum);

			}

			// After the completion of one epoch, warm start the 2nd epoch with the values of the
			// weight vectors seen at the end of the last chunk in previous epoch
			if(eid + 1 < totalEpochs){
				 //init w_iters[eid+1][0] to w_iters[eid][numChunks-1]
				 copy_vector(w_iters[eid+1][0], w_iters[eid][numChunks-1], sm.sizePsi);
				 printf("(ONLINE LEARNING) : WARM START ACROSS EPOCHS ..... DONE....\n");
			}

			printf("(OnlineSVM) : EPOCH COMPLETE -- primal objective: %.4f\n", primal_obj);
			printf("(ONLINE LEARNING) : EPOCH %d DONE! .....\n",eid);

		}

		time(&time_end_full);
		char msg[20];
		sprintf(msg,"(ONLINE LEARNING) : Total Time Taken : ");
		print_time(time_start_full, time_end_full, msg);

printf("(ONLINE LEARNING) Reached here\n");
  /* write structural model */
  write_struct_model_online(modelfile, &sm, &sparm, totalEpochs, numChunks);
  // skip testing for the moment  
  printf("(ONLINE LEARNING) Complete dumping\n");

  /* free memory */ //TODO: Need to change this ...
  free_struct_sample(sample);
  free_struct_model(sm, &sparm);

  return(0); 
  
}
コード例 #10
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
コード例 #11
0
ファイル: svm_struct_learn.cpp プロジェクト: aa755/cfg3d
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lhsXw,rhs_i;
  double      rhs=0;
  double      slack,ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *lhs_n=NULL;
  SVECTOR     *fy, *fydelta, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt_cacheupdate=0,rt_cacheconst=0,rt_cacheadd=0,rt_cachesum=0;
  double      rt1=0,rt2=0;
  long        progress;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;
  double      viol,viol_est,epsilon_est=0;
  long        uptr=0;
  long        *randmapping=NULL;
  long        batch_size=n;

  rt1=get_runtime();

  if(sparm->batch_size<100)
    batch_size=sparm->batch_size*n/100.0;

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    if(USE_FYCACHE) {
      fy=psi(ex[i].x,ex[i].y,sm,sparm);
      if(kparm->kernel_type == LINEAR_KERNEL) { /* store difference vector directly */
	diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); 
	free_svector(fy);
	fy=diff;
      }
    }
    else
      fy=NULL;
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm,sm);
    /* NOTE:  */
    for(i=0;i<n;i++) 
      if(loss(ex[i].y,ex[i].y,sparm) != 0) {
	printf("ERROR: Loss function returns non-zero value loss(y_%d,y_%d)\n",i,i);
	printf("       W4 algorithm assumes that loss(y_i,y_i)=0 for all i.\n");
	exit(1);
      }
  }
  
  if(kparm->kernel_type == LINEAR_KERNEL)
    lhs_n=create_nvector(sm->sizePsi);

  /* randomize order or training examples */
  if(batch_size<n)
    randmapping=random_order(n);

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=rt_init;

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      alphasum=0;
      for(j=0;(j<cset.m);j++) 
	  alphasum+=alpha[j];
      for(j=0,slack=-1;(j<cset.m) && (slack==-1);j++)  
	if(alpha[j] > alphasum/cset.m)
	  slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      slack=MAX(0,slack);

      rt_total+=MAX(get_runtime()-rt1,0);

      /**** find a violated joint constraint ****/
      lhs=NULL;
      rhs=0;
      if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
	rt1=get_runtime();
	/* Compute violation of constraints in cache for current w */
	if(struct_verbosity>=2) rt2=get_runtime();
	update_constraint_cache_for_model(ccache, svmModel);
	if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0);
	/* Is there is a sufficiently violated constraint in cache? */
	viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2);
	if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { 
	  /* There is a sufficiently violated constraint in cache, so
	     use this constraint in this iteration. */
	  if(struct_verbosity>=2) rt2=get_runtime();
	  viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  cached_constraint=1;
	}
	else {
	  /* There is no sufficiently violated constraint in cache, so
	     update cache by computing most violated constraint
	     explicitly for batch_size examples. */
	  viol_est=0;
	  progress=0;
	  viol=compute_violation_of_constraint_in_cache(ccache,0);
	  for(j=0;(j<batch_size) || ((j<n)&&(viol-slack<sparm->epsilon));j++) {
	    if(struct_verbosity>=1) 
	      print_percent_progress(&progress,n,10,".");
	    uptr=uptr % n;
	    if(randmapping) 
	      i=randmapping[uptr];
	    else
	      i=uptr;
	    /* find most violating fydelta=fy-fybar and rhs for example i */
	    find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],
					  fycache[i],n,sm,sparm,
					  &rt_viol,&rt_psi,&argmax_count);
	    /* add current fy-fybar and loss to cache */
	    if(struct_verbosity>=2) rt2=get_runtime();
	    viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model,
			     i,fydelta,rhs_i,0.0001*sparm->epsilon/n,
			     sparm->ccache_size,&rt_cachesum);
	    if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0);
	    viol_est+=ccache->constlist[i]->viol;
	    uptr++;
	  }
	  cached_constraint=(j<n);
	  if(struct_verbosity>=2) rt2=get_runtime();
	  if(cached_constraint)
	    viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  else
	    viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n,
							 &lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  viol_est*=((double)n/j);
	  epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack);
	  if((struct_verbosity >= 1) && (j!=n))
	    printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)",
		   100.0*j/n,viol_est-slack,epsilon_est);
	}
	lhsXw=rhs-viol;

	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(kparm->kernel_type == LINEAR_KERNEL)
	  clear_nvector(lhs_n,sm->sizePsi);
	progress=0;
	rt_total+=MAX(get_runtime()-rt1,0);

	for(i=0; i<n; i++) {
	  rt1=get_runtime();

	  if(struct_verbosity>=1) 
	    print_percent_progress(&progress,n,10,".");

	  /* compute most violating fydelta=fy-fybar and rhs for example i */
	  find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n,
				      sm,sparm,&rt_viol,&rt_psi,&argmax_count);
	  /* add current fy-fybar to lhs of constraint */
	  if(kparm->kernel_type == LINEAR_KERNEL) {
	    add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */
	    free_svector(fydelta);
	  }
	  else {
	    append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */
	    lhs=fydelta;
	  }
	  rhs+=rhs_i;                         /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR_KERNEL)
	  lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0,
				 COMPACT_ROUNDING_THRESH);
	doc=create_example(cset.m,0,1,1,lhs);
	lhsXw=classify_example(svmModel,doc);
	free_example(doc,0);
	viol=rhs-lhsXw;

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      if(slack > (rhs-lhsXw+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw);
	/* exit(1); */
      }
      ceps=MAX(0,rhs-lhsXw-slack);
      if((ceps > sparm->epsilon) || cached_constraint) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=rhs;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=2) rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	if(struct_verbosity>=2) rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,ceps); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,ceps); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=3)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=3)
	  printf("done. ");
      }
      else {
	free_svector(lhs);
      }

      if(struct_verbosity>=1)
	printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
	       svmModel->sv_num-1,ceps,svmModel->maxdiff);

      rt_total+=MAX(get_runtime()-rt1,0);

  } while(finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)|| cached_constraint || (ceps > sparm->epsilon) );

  // originally like below ... finalize_iteration was not called because of short-circuit evaluation
//  } while(cached_constraint || (ceps > sparm->epsilon) || 
//	  finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)
//	 );
  
  if(struct_verbosity>=1) {
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,ceps));

    slack=0;
    for(j=0;j<cset.m;j++) 
      slack=MAX(slack,
		cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    if(kparm->kernel_type == LINEAR_KERNEL)
      modellength=model_length_n(svmModel);
    else
      modellength=model_length_s(svmModel);
    dualitygap=(0.5*modellength*modellength+sparm->C*viol)
               -(alphasum-0.5*modellength*modellength);
    
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Primal objective value: pval=%.5f\n",
	    0.5*modellength*modellength+sparm->C*viol);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    printf("Number of SV: %ld \n",svmModel->sv_num-1);
    printf("Norm of weight vector: |w|=%.5f\n",modellength);
    printf("Value of slack variable (on working set): xi=%.5f\n",slack);
    printf("Value of slack variable (global): xi=%.5f\n",viol);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    if(struct_verbosity>=2) 
      printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total,
	   (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, 
	   (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total,
	   (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total,
	   (100.0*rt_cachesum)/rt_total);
    else if(struct_verbosity==1) 
      printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0);
  }
  if(ccache) {
    long cnum=0;
    CCACHEELEM *celem;
    for(i=0;i<n;i++) 
      for(celem=ccache->constlist[i];celem;celem=celem->next) 
	cnum++;
    printf("Final number of constraints in cache: %ld\n",cnum);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
    free_model(svmModel,0);
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(lhs_n)
    free_nvector(lhs_n);
  if(ccache)    
    free_constraint_cache(ccache);
  for(i=0;i<n;i++)
    if(fycache[i])
      free_svector(fycache[i]);
  free(fycache);
  free(alpha); 
  free(alphahist); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
  if(kparm->gram_matrix)
    free_matrix(kparm->gram_matrix);
}
コード例 #12
0
int main(int argc, char* argv[]) {

  double *w; /* weight vector */
  long m, i;
  double C, epsilon;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  char trainfile[1024];
  char modelfile[1024];
  int MAX_ITER;
  /* new struct variables */
  SVECTOR **fycache, *diff, *fy;
  EXAMPLE *ex;
	SAMPLE alldata;
  SAMPLE sample;
	SAMPLE val;
  STRUCT_LEARN_PARM sparm;
  STRUCTMODEL sm;
  
  double primal_obj;
  double stop_crit; 
	char itermodelfile[2000];

	/* self-paced learning variables */
	double init_spl_weight;
	double spl_weight;
	double spl_factor;
	int *valid_examples;
 

  /* read input parameters */
	my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm, 
													&init_spl_weight, &spl_factor); 

  epsilon = learn_parm.eps;
  C = learn_parm.svm_c;
  MAX_ITER = learn_parm.maxiter;

  /* read in examples */
  alldata = read_struct_examples(trainfile,&sparm);
  int ntrain = (int) round(1.0*alldata.n); /* no validation set */
	if(ntrain < alldata.n)
	{
 	 long *perm = randperm(alldata.n);
 	 sample = generate_train_set(alldata, perm, ntrain);
 	 val = generate_validation_set(alldata, perm, ntrain);
 	 free(perm);
	}
	else
	{
		sample = alldata;
	}
  ex = sample.examples;
  m = sample.n;
  
  /* initialization */
  init_struct_model(alldata,&sm,&sparm,&learn_parm,&kernel_parm); 

  w = create_nvector(sm.sizePsi);
  clear_nvector(w, sm.sizePsi);
  sm.w = w; /* establish link to w, as long as w does not change pointer */

  /* some training information */
  printf("C: %.8g\n", C);
	printf("spl weight: %.8g\n",init_spl_weight);
  printf("epsilon: %.8g\n", epsilon);
  printf("sample.n: %d\n", sample.n); 
  printf("sm.sizePsi: %ld\n", sm.sizePsi); fflush(stdout);


  /* prepare feature vector cache for correct labels with imputed latent variables */
  fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*));
  for (i=0;i<m;i++) {
    fy = psi(ex[i].x, ex[i].y, &sm, &sparm);
    diff = add_list_ss(fy);
    free_svector(fy);
    fy = diff;
    fycache[i] = fy;
  }

 	/* learn initial weight vector using all training examples */
	valid_examples = (int *) malloc(m*sizeof(int));     

  /* errors for validation set */

  double cur_loss, best_loss = DBL_MAX;
  int loss_iter;


	/* initializations */
	spl_weight = init_spl_weight;

	/* solve biconvex self-paced learning problem */
	primal_obj = alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight);
	printf("primal objective: %.4f\n", primal_obj);
	fflush(stdout);
	//alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight);
	int nValid = 0;
	for (i=0;i<m;i++) {
		if(valid_examples[i]) {
			nValid++;
		}
	}

		

	if(ntrain < alldata.n) {
		cur_loss = compute_current_loss(val,&sm,&sparm);
		printf("CURRENT LOSS: %f\n",cur_loss);
	}
  

  /* write structural model */
  write_struct_model(modelfile, &sm, &sparm);
  // skip testing for the moment  

  /* free memory */
  free_struct_sample(alldata);
	if(ntrain < alldata.n)
	{
		free(sample.examples);
		free(val.examples);
	}
  free_struct_model(sm, &sparm);
  for(i=0;i<m;i++) {
    free_svector(fycache[i]);
  }
  free(fycache);

	free(valid_examples);
   
  return(0); 
  
}
コード例 #13
0
int main(int argc, char *argv[]) {

    double *w; /* weight vector */
    int outer_iter;
    long m, i;
    double C, epsilon;
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    char trainfile[1024];
    char modelfile[1024];
    int MAX_ITER;
    /* new struct variables */
    SVECTOR **fycache, *diff, *fy;
    EXAMPLE *ex;
    SAMPLE sample;
    STRUCT_LEARN_PARM sparm;
    STRUCTMODEL sm;

    double decrement;
    double primal_obj, last_primal_obj;
    double cooling_eps;
    double stop_crit;


    /* read input parameters */
    my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm);

    epsilon = learn_parm.eps;
    C = learn_parm.svm_c;
    MAX_ITER = learn_parm.maxiter;

    /* read in examples */
    sample = read_struct_examples(trainfile, &sparm);
    ex = sample.examples;
    m = sample.n;

    /* initialization */
    init_struct_model(sample, &sm, &sparm, &learn_parm, &kernel_parm);

    w = create_nvector(sm.sizePsi);
    clear_nvector(w, sm.sizePsi);
    sm.w = w; /* establish link to w, as long as w does not change pointer */

    /* some training information */
    printf("C: %.8g\n", C);
    printf("epsilon: %.8g\n", epsilon);
    printf("sample.n: %ld\n", sample.n);
    printf("sm.sizePsi: %ld\n", sm.sizePsi);
    fflush(stdout);


    /* impute latent variable for first iteration */
    //init_latent_variables(&sample,&learn_parm,&sm,&sparm);

    /* prepare feature vector cache for correct labels with imputed latent variables */
    fycache = (SVECTOR **) malloc(m * sizeof(SVECTOR *));
    for (i = 0; i < m; i++) {
        fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);
        diff = add_list_ss(fy);
        free_svector(fy);
        fy = diff;
        fycache[i] = fy;
    }

    /* outer loop: latent variable imputation */
    outer_iter = 0;
    last_primal_obj = 0;
    decrement = 0;
    cooling_eps = 0.5 * C * epsilon;
    while ((outer_iter < 2) || ((!stop_crit) && (outer_iter < MAX_OUTER_ITER))) {
        printf("OUTER ITER %d\n", outer_iter);
        /* cutting plane algorithm */
        primal_obj = cutting_plane_algorithm(w, m, MAX_ITER, C, cooling_eps, fycache, ex, &sm, &sparm);

        /* compute decrement in objective in this outer iteration */
        decrement = last_primal_obj - primal_obj;
        last_primal_obj = primal_obj;
        printf("primal objective: %.4f\n", primal_obj);
        printf("decrement: %.4f\n", decrement);
        fflush(stdout);

        stop_crit = (decrement < C * epsilon) && (cooling_eps < 0.5 * C * epsilon + 1E-8);

        cooling_eps = -decrement * 0.01;
        cooling_eps = MAX(cooling_eps, 0.5 * C * epsilon);
        printf("cooling_eps: %.8g\n", cooling_eps);


        /* impute latent variable using updated weight vector */
        for (i = 0; i < m; i++) {
            free_latent_var(ex[i].h);
            ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm);
        }
        /* re-compute feature vector cache */
        for (i = 0; i < m; i++) {
            free_svector(fycache[i]);
            fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);
            diff = add_list_ss(fy);
            free_svector(fy);
            fy = diff;
            fycache[i] = fy;
        }


        outer_iter++;
    } // end outer loop


    /* write structural model */
    write_struct_model(modelfile, &sm, &sparm);
    // skip testing for the moment

    /* free memory */
    free_struct_sample(sample);
    free_struct_model(sm, &sparm);
    for (i = 0; i < m; i++) {
        free_svector(fycache[i]);
    }
    free(fycache);

    return (0);

}
コード例 #14
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache,
                               EXAMPLE *ex, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
    long i, j;
    double xi;
    double *alpha;
    double **G; /* Gram matrix */
    DOC **dXc; /* constraint matrix */
    double *delta; /* rhs of constraints */
    SVECTOR *new_constraint;
    double dual_obj, alphasum;
    int iter, size_active;
    double value;
    int r;
    int *idle; /* for cleaning up */
    double margin;
    double primal_obj;
    double *proximal_rhs;
    double *gammaG0 = NULL;
    double min_rho = 0.001;
    double max_rho;
    double serious_counter = 0;
    double rho = 1.0; /* temporarily set it to 1 first */

    double expected_descent, primal_obj_b = -1, reg_master_obj;
    int null_step = 1;
    double *w_b;
    double kappa = 0.1;
    double temp_var;
    double proximal_term, primal_lower_bound;

    double v_k;
    double obj_difference;
    double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
    double sigma_k;
    double m2 = 0.2;
    double m3 = 0.9;
    double gTd;
    double last_sigma_k = 0;

    double initial_primal_obj;
    int suff_decrease_cond = 0;
    double decrease_proportion = 0.2; // start from 0.2 first

    double z_k_norm;
    double last_z_k_norm = 0;


    /* set parameters for hideo solver */
    LEARN_PARM lparm;
    KERNEL_PARM kparm;
    MODEL *svmModel = NULL;
    lparm.biased_hyperplane = 0;
    lparm.epsilon_crit = MIN(epsilon, 0.001);
    lparm.svm_c = C;
    lparm.sharedslack = 1;
    kparm.kernel_type = LINEAR;

    lparm.remove_inconsistent = 0;
    lparm.skip_final_opt_check = 0;
    lparm.svm_maxqpsize = 10;
    lparm.svm_newvarsinqp = 0;
    lparm.svm_iter_to_shrink = -9999;
    lparm.maxiter = 100000;
    lparm.kernel_cache_size = 40;
    lparm.eps = epsilon;
    lparm.transduction_posratio = -1.0;
    lparm.svm_costratio = 1.0;
    lparm.svm_costratio_unlab = 1.0;
    lparm.svm_unlabbound = 1E-5;
    lparm.epsilon_a = 1E-10;  /* changed from 1e-15 */
    lparm.compute_loo = 0;
    lparm.rho = 1.0;
    lparm.xa_depth = 0;
    strcpy(lparm.alphafile, "");
    kparm.poly_degree = 3;
    kparm.rbf_gamma = 1.0;
    kparm.coef_lin = 1;
    kparm.coef_const = 1;
    strcpy(kparm.custom, "empty");


    w_b = create_nvector(sm->sizePsi);
    clear_nvector(w_b, sm->sizePsi);
    /* warm start */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w_b[i] = w[i];
    }

    iter = 0;
    size_active = 0;
    xi = 0.0;
    alpha = NULL;
    G = NULL;
    dXc = NULL;
    delta = NULL;
    idle = NULL;

    proximal_rhs = NULL;
    cut_error = NULL;

    new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
    value = margin - sprod_ns(w, new_constraint);

    primal_obj_b = 0.5 * sprod_nn(w_b, w_b, sm->sizePsi) + C * value;
    primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;
    primal_lower_bound = 0;
    expected_descent = -primal_obj_b;
    initial_primal_obj = primal_obj_b;

    //max_rho = C;
    max_rho = 100 * C; // tree-edge loss not within 0-1

    printf("Running CCCP inner loop solver: ");
    fflush(stdout);

    while ((!suff_decrease_cond) && (expected_descent < -epsilon) && (iter < MAX_ITER)) {
        iter += 1;
        size_active += 1;

#if (DEBUG_LEVEL > 0)
        printf("ITER %d\n", iter);
#endif
        printf(".");
        fflush(stdout);


        /* add  constraint */
        dXc = (DOC **) realloc(dXc, sizeof(DOC *) * size_active);
        assert(dXc != NULL);
        dXc[size_active - 1] = (DOC *) malloc(sizeof(DOC));
        dXc[size_active - 1]->fvec = new_constraint;
        dXc[size_active - 1]->slackid = 1; // only one common slackid (one-slack)
        dXc[size_active - 1]->costfactor = 1.0;

        delta = (double *) realloc(delta, sizeof(double) * size_active);
        assert(delta != NULL);
        delta[size_active - 1] = margin;
        alpha = (double *) realloc(alpha, sizeof(double) * size_active);
        assert(alpha != NULL);
        alpha[size_active - 1] = 0.0;
        idle = (int *) realloc(idle, sizeof(int) * size_active);
        assert(idle != NULL);
        idle[size_active - 1] = 0;
        /* proximal point */
        proximal_rhs = (double *) realloc(proximal_rhs, sizeof(double) * size_active);
        assert(proximal_rhs != NULL);
        cut_error = (double *) realloc(cut_error, sizeof(double) * size_active);
        assert(cut_error != NULL);
        // note g_i = - new_constraint
        cut_error[size_active - 1] = C * (sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint));
        cut_error[size_active - 1] += (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
        cut_error[size_active - 1] -= (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));

        gammaG0 = (double *) realloc(gammaG0, sizeof(double) * size_active);
        assert(gammaG0 != NULL);

        /* update Gram matrix */
        G = (double **) realloc(G, sizeof(double *) * size_active);
        assert(G != NULL);
        G[size_active - 1] = NULL;
        for (j = 0; j < size_active; j++) {
            G[j] = (double *) realloc(G[j], sizeof(double) * size_active);
            assert(G[j] != NULL);
        }
        for (j = 0; j < size_active - 1; j++) {
            G[size_active - 1][j] = sprod_ss(dXc[size_active - 1]->fvec, dXc[j]->fvec);
            G[j][size_active - 1] = G[size_active - 1][j];
        }
        G[size_active - 1][size_active - 1] = sprod_ss(dXc[size_active - 1]->fvec, dXc[size_active - 1]->fvec);


        /* update gammaG0 */
        if (null_step == 1) {
            gammaG0[size_active - 1] = sprod_ns(w_b, dXc[size_active - 1]->fvec);
        } else {
            for (i = 0; i < size_active; i++) {
                gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec);
            }
        }

        /* update proximal_rhs */
        for (i = 0; i < size_active; i++) {
            proximal_rhs[i] = (1 + rho) * delta[i] - rho * gammaG0[i];
        }


        /* solve QP to update alpha */
        //dual_obj = 0;
        //r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
        if (size_active > 1) {
            if (svmModel != NULL) free_model(svmModel, 0);
            svmModel = (MODEL *) my_malloc(sizeof(MODEL));
            svm_learn_optimization(dXc, proximal_rhs, size_active, sm->sizePsi, &lparm, &kparm, NULL, svmModel, alpha);
        } else {
            assert(size_active == 1);
            alpha[0] = C;
        }
        /* DEBUG */
        //printf("r: %d\n", r); fflush(stdout);
        /* END DEBUG */

        clear_nvector(w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            if (alpha[j] > C * ALPHA_THRESHOLD) {
                add_vector_ns(w, dXc[j]->fvec, alpha[j] / (1 + rho));
            }
        }

        /* compute dual obj */
        dual_obj = +0.5 * (1 + rho) * sprod_nn(w, w, sm->sizePsi);
        for (j = 0; j < size_active; j++) {
            dual_obj -= proximal_rhs[j] / (1 + rho) * alpha[j];
        }

        z_k_norm = sqrt(sprod_nn(w, w, sm->sizePsi));

        add_vector_nn(w, w_b, sm->sizePsi, rho / (1 + rho));


        /* detect if step size too small */
        sigma_k = 0;
        alphasum = 0;
        for (j = 0; j < size_active; j++) {
            sigma_k += alpha[j] * cut_error[j];
            alphasum += alpha[j];
        }
        sigma_k /= C;
        gTd = -C * (sprod_ns(w, new_constraint) - sprod_ns(w_b, new_constraint));

#if (DEBUG_LEVEL > 0)
        for (j=0;j<size_active;j++) {
          printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
        }
        printf("sigma_k: %.8g\n", sigma_k);
        printf("alphasum: %.8g\n", alphasum);
        printf("g^T d: %.8g\n", gTd);
        fflush(stdout);
#endif


        /* update cleanup information */
        for (j = 0; j < size_active; j++) {
            if (alpha[j] < ALPHA_THRESHOLD * C) {
                idle[j]++;
            } else {
                idle[j] = 0;
            }
        }

        new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm);
        value = margin - sprod_ns(w, new_constraint);

        /* print primal objective */
        primal_obj = 0.5 * sprod_nn(w, w, sm->sizePsi) + C * value;

#if (DEBUG_LEVEL > 0)
        printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif


        temp_var = sprod_nn(w_b, w_b, sm->sizePsi);
        proximal_term = 0.0;
        for (i = 1; i < sm->sizePsi + 1; i++) {
            proximal_term += (w[i] - w_b[i]) * (w[i] - w_b[i]);
        }

        reg_master_obj = -dual_obj + 0.5 * rho * temp_var / (1 + rho);
        expected_descent = reg_master_obj - primal_obj_b;

        v_k = (reg_master_obj - proximal_term * rho / 2) - primal_obj_b;

        primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5 * rho * (1 + rho) * proximal_term);

#if (DEBUG_LEVEL > 0)
        printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
        printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
        printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
        printf("ITER RHO: %.4f\n", rho);
        printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
        printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
        printf("ITER V_K: %.4f\n", v_k);
#endif
        obj_difference = primal_obj - primal_obj_b;

        if (primal_obj < primal_obj_b + kappa * expected_descent) {
            /* extra condition to be met */
            if ((gTd > m2 * v_k) || (rho < min_rho + 1E-8)) {
#if (DEBUG_LEVEL > 0)
                printf("SERIOUS STEP\n");
#endif
                /* update cut_error */
                for (i = 0; i < size_active; i++) {
                    cut_error[i] -= (primal_obj_b - 0.5 * sprod_nn(w_b, w_b, sm->sizePsi));
                    cut_error[i] -= C * sprod_ns(w_b, dXc[i]->fvec);
                    cut_error[i] += (primal_obj - 0.5 * sprod_nn(w, w, sm->sizePsi));
                    cut_error[i] += C * sprod_ns(w, dXc[i]->fvec);
                }
                primal_obj_b = primal_obj;
                for (i = 1; i < sm->sizePsi + 1; i++) {
                    w_b[i] = w[i];
                }
                null_step = 0;
                serious_counter++;
            } else {
                /* increase step size */
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: SS(ii) FAILS.\n");
#endif
                serious_counter--;
                rho = MAX(rho / 10, min_rho);
            }
        } else { /* no sufficient decrease */
            serious_counter--;

            if ((cut_error[size_active - 1] > m3 * last_sigma_k) &&
                (fabs(obj_difference) > last_z_k_norm + last_sigma_k)) {
#if (DEBUG_LEVEL > 0)
                printf("NULL STEP: NS(ii) FAILS.\n");
#endif
                rho = MIN(10 * rho, max_rho);
            }
#if (DEBUG_LEVEL > 0)
            else printf("NULL STEP\n");
#endif
        }
        /* update last_sigma_k */
        last_sigma_k = sigma_k;
        last_z_k_norm = z_k_norm;


        /* break away from while loop if more than certain proportioal decrease in primal objective */
        if (primal_obj_b / initial_primal_obj < 1 - decrease_proportion) {
            suff_decrease_cond = 1;
        }

        /* clean up */
        if (iter % CLEANUP_CHECK == 0) {
            //size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
            size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &gammaG0, &proximal_rhs, &G, &dXc,
                                         &cut_error);
        }


    } // end cutting plane while loop

    printf(" Inner loop optimization finished.\n");
    fflush(stdout);

    /* free memory */
    for (j = 0; j < size_active; j++) {
        free(G[j]);
        free_example(dXc[j], 0);
    }
    free(G);
    free(dXc);
    free(alpha);
    free(delta);
    free_svector(new_constraint);
    free(idle);
    free(gammaG0);
    free(proximal_rhs);
    free(cut_error);

    /* copy and free */
    for (i = 1; i < sm->sizePsi + 1; i++) {
        w[i] = w_b[i];
    }
    free(w_b);

    return (primal_obj_b);

}
コード例 #15
0
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lossval,factor,dist;
  double      margin=0;
  double      slack, slacksum, ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *diff_n=NULL;
  SVECTOR     *fy, *fybar, *f, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt1,rt2;
  double      progress,progress_old;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;

  rt1=get_runtime();

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    fy=psi(ex[i].x,ex[i].y,sm,sparm);
    if(kparm->kernel_type == LINEAR) {
      diff=add_list_ss(fy); /* store difference vector directly */
      free_svector(fy);
      fy=diff;
    }
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm);
  }

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=MAX(get_runtime()-rt1,0);

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      slack=0;
      for(j=0;j<cset.m;j++) 
	slack=MAX(slack,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      
      /**** find a violated joint constraint ****/
      lhs=NULL;
      dist=0;
      if(alg_type == DUAL_CACHE_ALG) {
	/* see if it is possible to construct violated constraint from cache */
	update_constraint_cache_for_model(ccache, svmModel);
	dist=find_most_violated_joint_constraint_in_cache(ccache,&lhs,&margin);
      }

      rt_total+=MAX(get_runtime()-rt1,0);

      /* Is there a sufficiently violated constraint in cache? */
      if(dist-slack > MAX(epsilon/10,sparm->epsilon)) { 
	/* use constraint from cache */
	rt1=get_runtime();
	cached_constraint=1;
	if(kparm->kernel_type == LINEAR) {
	  diff=add_list_ns(lhs); /* Linear case: compute weighted sum */
	  free_svector_shallow(lhs);
	}
	else { /* Non-linear case: make sure we have deep copy for cset */
	  diff=copy_svector(lhs); 
	  free_svector_shallow(lhs);
	}
	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(lhs)
	  free_svector_shallow(lhs);
	lhs=NULL;
	if(kparm->kernel_type == LINEAR) {
	  diff_n=create_nvector(sm->sizePsi);
	  clear_nvector(diff_n,sm->sizePsi);
	}
	margin=0;
	progress=0;
	progress_old=progress;
	rt_total+=MAX(get_runtime()-rt1,0);

	/**** find most violated joint constraint ***/
	for(i=0; i<n; i++) {
	  
	  rt1=get_runtime();
      
	  progress+=10.0/n;
	  if((struct_verbosity==1) && (((int)progress_old) != ((int)progress)))
	    {printf(".");fflush(stdout); progress_old=progress;}
	  if(struct_verbosity>=2)
	    {printf("."); fflush(stdout);}

	  rt2=get_runtime();
	  argmax_count++;
	  if(sparm->loss_type == SLACK_RESCALING) 
	    ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
							      ex[i].y,sm,
							      sparm);
	  else
	    ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
							       ex[i].y,sm,
							       sparm);
	  rt_viol+=MAX(get_runtime()-rt2,0);
	  
	  if(empty_label(ybar)) {
	    printf("ERROR: empty label was returned for example (%i)\n",i);
	    /* exit(1); */
	    continue;
	  }
	  
	  /**** get psi(x,y) and psi(x,ybar) ****/
	  rt2=get_runtime();
	  fy=copy_svector(fycache[i]); /*<= fy=psi(ex[i].x,ex[i].y,sm,sparm);*/
	  fybar=psi(ex[i].x,ybar,sm,sparm);
	  rt_psi+=MAX(get_runtime()-rt2,0);
	  lossval=loss(ex[i].y,ybar,sparm);
	  free_label(ybar);
	  
	  /**** scale feature vector and margin by loss ****/
	  if(sparm->loss_type == SLACK_RESCALING)
	    factor=lossval/n;
	  else                 /* do not rescale vector for */
	    factor=1.0/n;      /* margin rescaling loss type */
	  for(f=fy;f;f=f->next)
	    f->factor*=factor;
	  for(f=fybar;f;f=f->next)
	    f->factor*=-factor;
	  append_svector_list(fybar,fy);   /* compute fy-fybar */
	  
	  /**** add current fy-fybar and loss to cache ****/
	  if(alg_type == DUAL_CACHE_ALG) {
	    if(kparm->kernel_type == LINEAR) 
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 add_list_ss(fybar),
						 lossval/n,sparm->ccache_size);
	    else
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 copy_svector(fybar),
						 lossval/n,sparm->ccache_size);
	  }

	  /**** add current fy-fybar to constraint and margin ****/
	  if(kparm->kernel_type == LINEAR) {
	    add_list_n_ns(diff_n,fybar,1.0); /* add fy-fybar to sum */
	    free_svector(fybar);
	  }
	  else {
	    append_svector_list(fybar,lhs);  /* add fy-fybar to vector list */
	    lhs=fybar;
	  }
	  margin+=lossval/n;                 /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR) {
	  diff=create_svector_n(diff_n,sm->sizePsi,"",1.0);
	  free_nvector(diff_n);
	}
	else {
	  diff=lhs;
	}

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      doc=create_example(cset.m,0,1,1,diff);
      dist=classify_example(svmModel,doc);
      ceps=MAX(0,margin-dist-slack);
      if(slack > (margin-dist+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,margin-dist);
	/* exit(1); */
      }
      if(ceps > sparm->epsilon) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	if(sparm->slack_norm == 1) 
	  cset.lhs[cset.m]=create_example(cset.m,0,1,1,diff);
	else if(sparm->slack_norm == 2)
	  exit(1);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=margin;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=1) {
	    printf(":");fflush(stdout);
	  }
	  rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,MAX(ceps,sparm->epsilon)); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,MAX(ceps,sparm->epsilon)); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=2)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=2)
	  printf("done. (NumConst=%d) ",cset.m);
      }
      else {
	free_svector(diff);
      }

      if(struct_verbosity>=1)
	printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
	       svmModel->sv_num-1,ceps,svmModel->maxdiff);

      free_example(doc,0);
	
      rt_total+=MAX(get_runtime()-rt1,0);

  } while((ceps > sparm->epsilon) || 
	  finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)
	 );
  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    /**** WARNING: If positivity constraints are used, then the
	  maximum slack id is larger than what is allocated
	  below ****/
    slacksum=0;
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacksum=MAX(slacksum,
		     cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      exit(1);
    }
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    modellength=model_length_s(svmModel,kparm);
    dualitygap=(0.5*modellength*modellength+sparm->C*(slacksum+ceps))
               -(alphasum-0.5*modellength*modellength);
    
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,ceps));
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Norm of weight vector: |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    printf("Value of slack variable (on working set): xi=%.5f\n",slacksum);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total,
	   (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, 
	   (100.0*rt_init)/rt_total);
  }
  if(ccache) {
    long cnum=0;
    CCACHEELEM *celem;
    for(i=0;i<n;i++) 
      for(celem=ccache->constlist[i];celem;celem=celem->next) 
	cnum++;
    printf("Final number of constraints in cache: %ld\n",cnum);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(ccache)    
    free_constraint_cache(ccache);
  for(i=0;i<n;i++)
    free_svector(fycache[i]);
  free(fycache);
  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(alphahist); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
  if(kparm->gram_matrix)
    free_matrix(kparm->gram_matrix);
}