CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) 
     /* calculate the kernel function */
{
  double sum=0;
  SVECTOR *fa,*fb;

  if(kernel_parm->kernel_type == GRAM) {  /* use value from explicitly */
    if((a->kernelid>=0) && (b->kernelid>=0)) /* stored gram matrix */
      return(kernel_parm->gram_matrix->element[MAX(a->kernelid,b->kernelid)]
	                                      [MIN(a->kernelid,b->kernelid)]);
    else 
      return(0); /* in case it is called for unknown vector */
  }

  /* in case the constraints are sums of feature vector as represented
     as a list of SVECTOR's with their coefficient factor in the sum,
     take the kernel between all pairs */ 
  for(fa=a->fvec;fa;fa=fa->next) { 
    for(fb=b->fvec;fb;fb=fb->next) {
      if(fa->kernel_id == fb->kernel_id)
		sum+=fa->factor*fb->factor*single_kernel(kernel_parm,fa,fb);
    }
  }
  
  free_svector( fa );
  free_svector( fb );
  return(sum);
}
Пример #2
0
LABEL       find_most_violated_constraint_slackrescaling(PATTERNX x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm)
{
  /* Finds the label ybar for pattern x that that is responsible for
     the most violated constraint for the slack rescaling
     formulation. It has to take into account the scoring function in
     sm, especially the weights sm.w, as well as the loss
     function. The weights in sm.w correspond to the features defined
     by psi() and range from index 1 to index sm->sizePsi. Most simple
     is the case of the zero/one loss function. For the zero/one loss,
     this function should return the highest scoring label ybar, if
     ybar is unequal y; if it is equal to the correct label y, then
     the function shall return the second highest scoring label. If
     the function cannot find a label, it shall return an empty label
     as recognized by the function empty_label(y). */
  LABEL ybar;
  DOC doc;
  long classlabel, bestclass=-1, first=1;
  double score, score_y, score_ybar, bestscore=-1;

  /* NOTE: This function could be made much more efficient by not
     always computing a new PSI vector. */
  doc = *(x.doc);
  doc.fvec = psi(x,y,sm,sparm);
  score_y = classify_example(sm->svm_model,&doc);
  free_svector(doc.fvec);

  ybar.scores = NULL;
  ybar.num_classes = sparm->num_classes;
  for(classlabel=1; classlabel<=sparm->num_classes; classlabel++) {
    ybar.classlabel = classlabel;
    doc.fvec=psi(x,ybar,sm,sparm);
    score_ybar=classify_example(sm->svm_model,&doc);
    free_svector(doc.fvec);
    score=loss(y,ybar,sparm,x.doc->fvec)*(1.0-score_y+score_ybar);
    if((bestscore<score)  || (first)) {
      bestscore=score;
      bestclass = classlabel;
      first=0;
    }
  }
  if(bestclass == -1) 
    printf("ERROR: Only one class\n");
  ybar.classlabel = bestclass;
  if(struct_verbosity>=3)
    printf("[%ld:%.2f] ",bestclass,bestscore);
  return(ybar);
}
Пример #3
0
int rd_coords( FILE *istream, double ***ptr_pcoords, int *ncoords )
{
 int n,i,status=0;
 double **p_coords;
 char line[MAXLENGTH],**p_parse;

 p_coords = dmatrix(2,MAX_N_COORDS);
 n=0;
 while (fgets( line, MAXLENGTH, istream ) != NULL) {
   if (n>=MAX_N_COORDS) {
   	fprintf(stderr,"ERROR: Too many sources. Max=%d\n",MAX_N_COORDS);
	free_dmatrix(p_coords);
	return(ERRNO_INPUT_ERROR);
   }
   p_parse = svector(2,MAXLENGTH);
   splitstr(line,p_parse,SPACES);
   *(*(p_coords+0)+n) = atof(p_parse[0]);
   *(*(p_coords+1)+n) = atof(p_parse[1]);
   n++;
   free_svector(p_parse);
 }
 *ncoords = n;

 *ptr_pcoords = dmatrix(2,*ncoords);
 for (i=0;i<*ncoords;i++) {
   *(*(*ptr_pcoords + 0) + i) = *(*(p_coords+0)+i);
   *(*(*ptr_pcoords + 1) + i) = *(*(p_coords+1)+i);
 }
 free_dmatrix(p_coords);

 return(status);
}
Пример #4
0
void free_svector(SVECTOR *vec)
{
  if(vec) {
    my_free(vec->words);
    if(vec->userdefined)
      my_free(vec->userdefined);
    free_svector(vec->next);
    my_free(vec);
  }
}
Пример #5
0
void free_example(DOC *example, long deep)
{
  if(example) {
    if(deep) {
      if(example->fvec)
	free_svector(example->fvec);
    }
    my_free(example);
  }
}
void free_pattern(PATTERN x) {
  int i,j;
  for (i=0;i<x.num_nps;i++) {
    for (j=0;j<i;j++) {
      free_svector(x.pair_features[i][j]);
    }
	if (i>0)
	  free(x.pair_features[i]);
  }
  free(x.pair_features);
}
Пример #7
0
double current_obj_val(EXAMPLE *ex, SVECTOR **fycache, long m, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, double C, int *valid_examples) {

  long i, j;
  SVECTOR *f, *fy, *fybar, *lhs;
  LABEL       ybar;
  double lossval, margin;
  double *new_constraint;
	double obj = 0.0;

  /* find cutting plane */
  lhs = NULL;
  margin = 0;
  for (i=0;i<m;i++) {
		if(!valid_examples[i])
			continue;
    find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, sm, sparm);
    /* get difference vector */
    fy = copy_svector(fycache[i]);
    fybar = psi(ex[i].x,ybar,sm,sparm);
    lossval = loss(ex[i].y,ybar,sparm);

    /* scale difference vector */
    for (f=fy;f;f=f->next) {
      //f->factor*=1.0/m;
      f->factor*=ex[i].x.example_cost/m;
    }
    for (f=fybar;f;f=f->next) {
      //f->factor*=-1.0/m;
      f->factor*=-ex[i].x.example_cost/m;
    }
    /* add ybar to constraint */
    append_svector_list(fy,lhs);
    append_svector_list(fybar,fy);
    lhs = fybar;
    //margin+=lossval/m;
		margin += lossval*ex[i].x.example_cost/m;
  }

  /* compact the linear representation */
  new_constraint = add_list_nn(lhs, sm->sizePsi);
  free_svector(lhs);

	obj = margin;
	for(i = 1; i < sm->sizePsi+1; i++)
		obj -= new_constraint[i]*sm->w[i];
	if(obj < 0.0)
		obj = 0.0;
	obj *= C;
	for(i = 1; i < sm->sizePsi+1; i++)
		obj += 0.5*sm->w[i]*sm->w[i];
  free(new_constraint);

	return obj;
}
Пример #8
0
void add_constraint_to_constraint_cache(CCACHE *ccache, MODEL *svmModel, int exnum, SVECTOR *fydelta, double rhs, int maxconst)
     /* add new constraint fydelta*w>rhs for example exnum to cache,
	if it is more violated than the currently most violated
	constraint in cache. if this grows the number of constraint
	for this example beyond maxconst, then the most unused
	constraint is deleted. the funciton assumes that
	update_constraint_cache_for_model has been run. */
{
  double  viol;
  double  dist_ydelta;
  DOC     *doc_fydelta;
  CCACHEELEM *celem;
  int     cnum;

  doc_fydelta=create_example(1,0,1,1,fydelta);
  dist_ydelta=classify_example(svmModel,doc_fydelta);
  free_example(doc_fydelta,0);  
  viol=rhs-dist_ydelta;

  if((viol-0.000000000001) > ccache->constlist[exnum]->viol) {
    celem=ccache->constlist[exnum];
    ccache->constlist[exnum]=(CCACHEELEM *)malloc(sizeof(CCACHEELEM));
    ccache->constlist[exnum]->next=celem;
    ccache->constlist[exnum]->fydelta=fydelta;
    ccache->constlist[exnum]->rhs=rhs;
    ccache->constlist[exnum]->viol=viol;

    /* remove last constraint in list, if list is longer than maxconst */
    cnum=2;
    for(celem=ccache->constlist[exnum];celem && celem->next && celem->next->next;celem=celem->next)
      cnum++;
    if(cnum>maxconst) {
      free_svector(celem->next->fydelta);
      free(celem->next);
      celem->next=NULL;
    }
  }
  else {
    free_svector(fydelta);
  }
}
Пример #9
0
SVECTOR* add_list_ss(SVECTOR *a) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR */
{
  SVECTOR *scaled,*oldsum,*sum,*f;
  WORD    empty[2];
    
  if(a){
    sum=smult_s(a,a->factor);
    for(f=a->next;f;f=f->next) {
      scaled=smult_s(f,f->factor);
      oldsum=sum;
      sum=add_ss(sum,scaled);
      free_svector(oldsum);
      free_svector(scaled);
    }
    sum->factor=1.0;
  }
  else {
    empty[0].wnum=0;
    sum=create_svector(empty,"",1.0);
  }
  return(sum);
}
Пример #10
0
void free_constraint_cache(CCACHE *ccache)
     /* frees all memory allocated for constraint cache */
{
  CCACHEELEM *celem,*next;
  int i;
  for(i=0; i<ccache->n; i++) {
    celem=ccache->constlist[i];
    while(celem) {
      free_svector(celem->fydelta);
      next=celem->next;
      free(celem);
      celem=next;
    }
  }
  free(ccache->constlist);
  free(ccache);
}
SVECTOR *psi(PATTERN x, LABEL y, LATENT_VAR h, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
  LL_NODE *edge;
  SVECTOR *oldsum, *sum;  
  WORD empty[2];

  empty[0].wnum=0;
  sum=create_svector(empty,"",1.0);

  edge = h.head;
  while (edge!=NULL) {
    oldsum = sum;
    sum = add_ss(x.pair_features[edge->u][edge->v], oldsum);
    free_svector(oldsum);
    edge = edge->next;
  }
  
  return(sum);
}
Пример #12
0
double model_length_n(MODEL *model) 
     /* compute length of weight vector */
{
  long     i,totwords=model->totwords+1;
  double   sum,*weight_n;
  SVECTOR  *weight;

  if(model->kernel_parm.kernel_type != LINEAR) {
    printf("ERROR: model_length_n applies only to linear kernel!\n");
    exit(1);
  }
  weight_n=create_nvector(totwords);
  clear_nvector(weight_n,totwords);
  for(i=1;i<model->sv_num;i++) 
    add_list_n_ns(weight_n,model->supvec[i]->fvec,model->alpha[i]);
  weight=create_svector_n(weight_n,totwords,NULL,1.0);
  sum=sprod_ss(weight,weight);
  free(weight_n);
  free_svector(weight);
  return(sqrt(sum));
}
Пример #13
0
LABEL       classify_struct_example(PATTERNX x, STRUCTMODEL *sm, 
				    STRUCT_LEARN_PARM *sparm)
{
  /* Finds the label yhat for pattern x that scores the highest
     according to the linear evaluation function in sm, especially the
     weights sm.w. The returned label is taken as the prediction of sm
     for the pattern x. The weights correspond to the features defined
     by psi() and range from index 1 to index sm->sizePsi. If the
     function cannot find a label, it shall return an empty label as
     recognized by the function empty_label(y). */
  LABEL y;
  DOC doc;
  long classlabel, bestclass=-1, first=1, j;
  double score, bestscore=-1;
  TOKEN *words;

  doc = *(x.doc);
  y.scores = (double *)my_malloc(sizeof(double)*(sparm->num_classes+1));
  y.num_classes = sparm->num_classes;
  words = doc.fvec->words;
  for(j=0; (words[j]).wnum != 0; j++) {       /* Check if feature numbers   */
    if((words[j]).wnum>sparm->num_features) /* are not larger than in     */
      (words[j]).wnum=0;                    /* model. Remove feature if   */
  }                                         /* necessary.                 */
  for(classlabel=1; classlabel<=sparm->num_classes; classlabel++) 
  {
	  y.classlabel = classlabel;
	  doc.fvec = psi(x,y,sm,sparm);
	  score = classify_example(sm->svm_model,&doc);
	  free_svector(doc.fvec);
	  y.scores[classlabel] = score;
	  if((bestscore<score)  || (first)) {
		  bestscore = score;
		  bestclass = classlabel;
		  first = 0;
	  }
  }
  y.classlabel = bestclass;
  return(y);
}
Пример #14
0
SVECTOR* add_list_ss_r(SVECTOR *a, double min_non_zero) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR */
{
  SVECTOR *oldsum,*sum,*f;
  WORD    empty[2];
    
  if(!a) {
    empty[0].wnum=0;
    sum=create_svector(empty,NULL,1.0);
  }
  else if(a && (!a->next)) {
    sum=smult_s(a,a->factor);
  }
  else {
    sum=multadd_ss_r(a,a->next,a->factor,a->next->factor,min_non_zero);
    for(f=a->next->next;f;f=f->next) {
      oldsum=sum;
      sum=multadd_ss_r(oldsum,f,1.0,f->factor,min_non_zero);
      free_svector(oldsum);
    }
  }
  return(sum);
}
Пример #15
0
int update_valid_examples(double *w, long m, double C, SVECTOR **fycache, EXAMPLE *ex, 
													STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples, double spl_weight) {

	long i, j;

	/* if self-paced learning weight is non-positive, all examples are valid */
	if(spl_weight <= 0.0) {
		for (i=0;i<m;i++)
			valid_examples[i] = 1;
		return (m);
	}

	sortStruct *slack = (sortStruct *) malloc(m*sizeof(sortStruct));
	LABEL ybar;
	SVECTOR *f, *fy, *fybar;
	double lossval;
	double penalty = 1.0/spl_weight;
	if(penalty < 0.0)
		penalty = DBL_MAX;

	for (i=0;i<m;i++) {
		find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, sm, sparm);
		fy = copy_svector(fycache[i]);
		fybar = psi(ex[i].x,ybar,sm,sparm);
		slack[i].index = i;
		slack[i].val = loss(ex[i].y,ybar,sparm);
		for (f=fy;f;f=f->next) {
			j = 0;
			while (1) {
				if(!f->words[j].wnum)
					break;
				slack[i].val -= sm->w[f->words[j].wnum]*f->words[j].weight;
				j++;
			}
		}
		for (f=fybar;f;f=f->next) {
			j = 0;
			while (1) {
				if(!f->words[j].wnum)
					break;
				slack[i].val += sm->w[f->words[j].wnum]*f->words[j].weight;
				j++;
			}
		}
		free_svector(fy);
		free_svector(fybar);
	}
	qsort(slack,m,sizeof(sortStruct),&compar);

	int nValid = 0;
	for (i=0;i<m;i++)
		valid_examples[i] = 0;
	for (i=0;i<m;i++) {
		if(slack[i].val*C/m > penalty)
			break;
		valid_examples[slack[i].index] = 1;
		nValid++;
	}

	free(slack);

	return nValid;
}
double optimizeMultiVariatePerfMeasure(SAMPLE sample, int datasetStartIdx, int chunkSz, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm,
		double C, double Cdash, double epsilon, int MAX_ITER, LEARN_PARM *learn_parm, char *trainfile,
		double ***w_iters, int eid, int chunkid, int numChunks, double *zeroes){

	int i;
	time_t time_start, time_end;
	double decrement;
	double primal_obj, last_primal_obj;
	double cooling_eps;
	double stop_crit;
	LATENT_VAR *imputed_h = NULL;

	int dataset_sz = sample.n;
	SVECTOR **fycache, *diff, *fy;
	EXAMPLE *ex = sample.examples;

	/* some training information */
	printf("C: %.8g\n", C);
	printf("Cdash: %.8g\n", Cdash);
	printf("epsilon: %.8g\n", epsilon);
	printf("sample.n: %ld\n", dataset_sz);
	printf("sm->sizePsi: %ld\n", sm->sizePsi); fflush(stdout);

	/* prepare feature vector cache for correct labels with imputed latent variables */
	fycache = (SVECTOR**)malloc(dataset_sz*sizeof(SVECTOR*));
	for (i=0;i<dataset_sz;i++) {
		fy = psi(ex[i].x, ex[i].y, ex[i].h, sm, sparm);
		diff = add_list_ss(fy);
		free_svector(fy);
		fy = diff;
		fycache[i] = fy;
	}

	/* time taken stats */
	time(&time_start);

	/* outer loop: latent variable imputation */
	int outer_iter = 0;
	last_primal_obj = 0;
	decrement = 0;
	cooling_eps = 0.5*MAX(C,Cdash)*epsilon;
	while ((outer_iter<2)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) {
		printf("OUTER ITER %d\n", outer_iter); fflush(stdout);
		/* cutting plane algorithm */
		time_t cp_start, cp_end;
		time(&cp_start);

		/// NOTE : Change of variables (Create 'u' by subtracting w_prev from w)
		create_u_variables(w_iters, eid, chunkid, numChunks, sm, zeroes);

		if(chunkid == 0 && eid == 0){ // First Chunk of First Epoch
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, zeroes, numChunks); // pass the zeroes vector
		}
		else if(chunkid == 0){ // First chunk of the new Epoch
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, w_iters[eid-1][numChunks-1], numChunks); // Last chunk of previous epoch
		}
		else {
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, w_iters[eid][chunkid-1], numChunks); // previous chunk id of current epoch
		}

		time(&cp_end);

#if(DEBUG_LEVEL==1)
		char msg[20];
		sprintf(msg,"OUTER ITER %d", outer_iter);
		print_time(cp_start, cp_end, msg);
#endif

		/* compute decrement in objective in this outer iteration */
		decrement = last_primal_obj - primal_obj;
		last_primal_obj = primal_obj;
		printf("primal objective: %.4f\n", primal_obj);
		printf("decrement: %.4f\n", decrement); fflush(stdout);

		stop_crit = (decrement<MAX(C, Cdash)*epsilon)&&(cooling_eps<0.5*MAX(C, Cdash)*epsilon+1E-8);

		cooling_eps = -decrement*0.01;
		cooling_eps = MAX(cooling_eps, 0.5*MAX(C,Cdash)*epsilon);
		printf("cooling_eps: %.8g\n", cooling_eps);


		/* impute latent variable using updated weight vector */
		for(i = 0; i < dataset_sz; i ++)
			free_latent_var(ex[i].h);
		if(imputed_h != NULL)
			free(imputed_h);

		imputed_h = (LATENT_VAR*)malloc(sizeof(LATENT_VAR) * dataset_sz);
		infer_latent_variables_all(imputed_h, sm, sparm, dataset_sz, learn_parm->tmpdir, trainfile, datasetStartIdx, chunkSz, eid, chunkid);

		for (i=0;i<dataset_sz;i++) {
			//      free_latent_var(ex[i].h);
			//      ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm); // ILP for  Pr (Z | Y_i, X_i) in our case
			ex[i].h = imputed_h[i];
		}
		/* re-compute feature vector cache */
		for (i=0;i<dataset_sz;i++) {
			free_svector(fycache[i]);
			fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);
			diff = add_list_ss(fy);
			free_svector(fy);
			fy = diff;
			fycache[i] = fy;
		}
		printf("(OnlineSVM) .. finished outer_iter %d\n",outer_iter);
		outer_iter++;

		/// NOTE: Restore the 'w' by adding the current 'u' to w_prev
		restore_w_variables(w_iters, eid, chunkid, numChunks, sm, zeroes);

	} // end outer loop

	time(&time_end);

	#if (DEBUG_LEVEL==1)
	  print_time(time_start, time_end, "Total time");
	#endif

	for(i=0;i<dataset_sz;i++) {
		free_svector(fycache[i]);
	}
	free(fycache);

	return primal_obj;
}
SVECTOR* find_cutting_plane(EXAMPLE *ex, SVECTOR **fycache, double *margin, long m, STRUCTMODEL *sm,
		STRUCT_LEARN_PARM *sparm, char* tmpdir, char *trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation,
		double *margin2, int datasetStartIdx, int chunkSz, int eid, int chunkid) {

  long i;
  SVECTOR *f, *fy, *fybar, *lhs;
  LABEL       ybar;
  LATENT_VAR hbar;
  double lossval;
  double *new_constraint;

  long l,k;
  SVECTOR *fvec;
  WORD *words;  

  LABEL       *ybar_all = (LABEL*) malloc(sizeof(LABEL) * m);
  LATENT_VAR *hbar_all = (LATENT_VAR*) malloc (sizeof(LATENT_VAR) * m);
  time_t mv_start, mv_end;

  time(&mv_start);
  find_most_violated_constraint_marginrescaling_all_online(ybar_all, hbar_all, sm, sparm, m,
		  tmpdir, trainfile, frac_sim, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  Fweight, datasetStartIdx, chunkSz, eid, chunkid);
  time(&mv_end);

#if (DEBUG_LEVEL==1)
  print_time(mv_start, mv_end, "Max violators");
#endif


  /* find cutting plane */
  lhs = NULL;
  lossval = lossF1(ex, m, ybar_all, sparm, Fweight);
  *margin = lossval;

  *margin2 = 0;
  for (i=0;i<m;i++) {
    //find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, &hbar, sm, sparm);
    ybar = ybar_all[i];
    hbar = hbar_all[i];
    /* get difference vector */
    fy = copy_svector(fycache[i]);
    fybar = psi(ex[i].x,ybar,hbar,sm,sparm);
    lossval = loss(ex[i].y,ybar,hbar,sparm);
    free_label(ybar);
    free_latent_var(hbar);

    /* scale difference vector */
    for (f=fy;f;f=f->next) {
      f->factor*=1.0/m;
      //f->factor*=ex[i].x.example_cost/m;
    }

    for (f=fybar;f;f=f->next) {
      f->factor*=-1.0/m;
      //f->factor*=-ex[i].x.example_cost/m;
    }
    /* add ybar to constraint */
    append_svector_list(fy,lhs);
    append_svector_list(fybar,fy);
    lhs = fybar;
    *margin2+=lossval/m;
    //*margin+=lossval*ex[i].x.example_cost/m;
  }

  free(ybar_all);
  free(hbar_all);

  /* compact the linear representation */
  new_constraint = add_list_nn(lhs, sm->sizePsi);

//  printf("After this segfault ? \n");fflush(stdout);
//  printf("%x\n",new_constraint);

  free_svector(lhs);

  l=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) l++; // non-zero
  }
  words = (WORD*)my_malloc(sizeof(WORD)*(l+1)); 
  assert(words!=NULL);
  k=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) {
      words[k].wnum = i;
      words[k].weight = new_constraint[i]; 
      k++;
    }
  }
  words[k].wnum = 0;
  words[k].weight = 0.0;
  fvec = create_svector(words,"",1);

  free(words);
  free(new_constraint);

  return(fvec); 

}
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex,
		STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, char *tmpdir, char * trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation, double Cdash, int datasetStartIdx, int chunkSz,
		int eid, int chunkid, double *w_prev, int numChunks) {
//	  printf("Addr. of w (inside cp_algo) %x\t%x\n",w,sm->w);
  long i,j;
  double xi;
  double *alpha;
  double **G; /* Gram matrix */
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  double dual_obj, alphasum;
  int iter, size_active; 
  double value;
  int r;
  int *idle; /* for cleaning up */
  double margin;
  double primal_obj;
  double *proximal_rhs;
  double *gammaG0=NULL;
  double min_rho = 0.001;
  double max_rho;
  double serious_counter=0;
  double rho = 1.0; /* temporarily set it to 1 first */

  double expected_descent, primal_obj_b=-1, reg_master_obj;
  int null_step=1;
  double *w_b;
  double kappa=0.1;
  double temp_var;
  double proximal_term, primal_lower_bound;

  double v_k; 
  double obj_difference; 
  double *cut_error; // cut_error[i] = alpha_{k,i} at current center x_k
  double sigma_k; 
  double m2 = 0.2;
  double m3 = 0.9;
  double gTd; 
  double last_sigma_k=0; 

  double initial_primal_obj;
  int suff_decrease_cond=0;
  double decrease_proportion = 0.2; // start from 0.2 first 

  double z_k_norm;
  double last_z_k_norm=0;

  w_b = create_nvector(sm->sizePsi);
  clear_nvector(w_b,sm->sizePsi);
  /* warm start */
  for (i=1;i<sm->sizePsi+1;i++) {
    w_b[i] = w[i];
  }

  iter = 0;
  size_active = 0;
  xi = 0.0;
  alpha = NULL;
  G = NULL;
  dXc = NULL;
  delta = NULL;
  idle = NULL;

  proximal_rhs = NULL;
  cut_error = NULL; 

  printf("ITER 0 \n(before cutting plane) \n");
  double margin2;
  new_constraint = find_cutting_plane (ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim,
		  Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation, &margin2,
		  datasetStartIdx, chunkSz, eid, chunkid);
  value = margin2 - sprod_ns(w, new_constraint);

  margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
  	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)
	
  primal_obj_b = 0.5*sprod_nn(w_b,w_b,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss
  primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
  primal_lower_bound = 0;
  expected_descent = -primal_obj_b;
  initial_primal_obj = primal_obj_b; 

  max_rho = C; 

  printf("Running CCCP inner loop solver: \n"); fflush(stdout);

  time_t iter_start, iter_end;

  while ((!suff_decrease_cond)&&(expected_descent<-epsilon)&&(iter<MAX_ITER)) { 
    iter+=1;
    size_active+=1;

    time(&iter_start);

#if (DEBUG_LEVEL>0)
    printf("ITER %d\n", iter); 
#endif
    printf("."); fflush(stdout); 

    /* add  constraint */
    dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
    assert(dXc!=NULL);
    dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
    dXc[size_active-1]->fvec = new_constraint; 
    dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
    dXc[size_active-1]->costfactor = 1.0;

    delta = (double*)realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = margin2; // Ajay: changing for the formulation combining hamming and F1loss
    alpha = (double*)realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    idle = (int*)realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL); 
    idle[size_active-1] = 0;
    /* proximal point */
    proximal_rhs = (double*)realloc(proximal_rhs, sizeof(double)*size_active);
    assert(proximal_rhs!=NULL); 
    cut_error = (double*)realloc(cut_error, sizeof(double)*size_active); 
    assert(cut_error!=NULL); 
    // note g_i = - new_constraint
    cut_error[size_active-1] = C*(sprod_ns(w_b, new_constraint) - sprod_ns(w, new_constraint)); 
    cut_error[size_active-1] += (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
    cut_error[size_active-1] -= (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi)); 

    gammaG0 = (double*)realloc(gammaG0, sizeof(double)*size_active);
    assert(gammaG0!=NULL);
      
    /* update Gram matrix */
    G = (double**)realloc(G, sizeof(double*)*size_active);
    assert(G!=NULL);
    G[size_active-1] = NULL;
    for (j=0;j<size_active;j++) {
      G[j] = (double*)realloc(G[j], sizeof(double)*size_active);
      assert(G[j]!=NULL);
    }
    for (j=0;j<size_active-1;j++) {
      G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
      G[j][size_active-1] = G[size_active-1][j];
    }
    G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

	
    /* update gammaG0 */
    if (null_step==1) {
      gammaG0[size_active-1] = sprod_ns(w_b, dXc[size_active-1]->fvec);
    } else {
      for (i=0;i<size_active;i++) {
	gammaG0[i] = sprod_ns(w_b, dXc[i]->fvec); 
      }
    }

     /* update proximal_rhs */
    for (i=0;i<size_active;i++) {
      proximal_rhs[i] = delta[i] - rho/(1+rho)*gammaG0[i];
    }


    /* solve QP to update alpha */
    dual_obj = 0; 
    time_t mosek_start, mosek_end;
    time(&mosek_start);
    r = mosek_qp_optimize(G, proximal_rhs, alpha, (long) size_active, C, &dual_obj,rho);
    time(&mosek_end);
#if(DEBUG_LEVEL == 1)
    print_time(mosek_start, mosek_end, "Mosek solver");
#endif
    /* DEBUG */
    //printf("r: %d\n", r); fflush(stdout);
    /* END DEBUG */

    clear_nvector(w,sm->sizePsi);
    for (j=0;j<size_active;j++) {
      if (alpha[j]>C*ALPHA_THRESHOLD) {
	add_vector_ns(w,dXc[j]->fvec,alpha[j]/(1+rho));
      }
    }

    z_k_norm = sqrt(sprod_nn(w,w,sm->sizePsi)); 

    add_vector_nn(w, w_b, sm->sizePsi, rho/(1+rho));

    
    /* detect if step size too small */
    sigma_k = 0; 
    alphasum = 0; 
    for (j=0;j<size_active;j++) {
      sigma_k += alpha[j]*cut_error[j]; 
      alphasum+=alpha[j]; 
    }
    sigma_k/=C; 
    gTd = -C*(sprod_ns(w,new_constraint) - sprod_ns(w_b,new_constraint));

#if (DEBUG_LEVEL>0)
    for (j=0;j<size_active;j++) {
      printf("alpha[%d]: %.8g, cut_error[%d]: %.8g\n", j, alpha[j], j, cut_error[j]);
    }
    printf("sigma_k: %.8g\n", sigma_k); 
    printf("alphasum: %.8g\n", alphasum);
    printf("g^T d: %.8g\n", gTd); 
    fflush(stdout); 
#endif


    /* update cleanup information */
    for (j=0;j<size_active;j++) {
      if (alpha[j]<ALPHA_THRESHOLD*C) {
	idle[j]++;
      } else {
        idle[j]=0;
      }
    }

  new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile,
		  frac_sim, Fweight, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  &margin2, datasetStartIdx, chunkSz, eid, chunkid);
 //   new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, tmpdir, trainfile, frac_sim, Fweight, dataset_stats_file, rho);
    value = margin2 - sprod_ns(w, new_constraint);

    margin -= sprod_ns(w_prev, new_constraint); //(Ajay: ONLINE LEARNING) IMPT NOTE --> constant addition to the loss ..
    	  	  	  	  	  	  	  	  	  	  	  // model score using w_prev values ('-' is used because the terms are reversed in the code)

    /* print primal objective */
    primal_obj = 0.5*sprod_nn(w,w,sm->sizePsi)+C*value + Cdash*margin/numChunks; // Ajay: Change in obj involing both hamming and F1 loss;
     
#if (DEBUG_LEVEL>0)
    printf("ITER PRIMAL_OBJ %.4f\n", primal_obj); fflush(stdout);
#endif
    
 
    temp_var = sprod_nn(w_b,w_b,sm->sizePsi); 
    proximal_term = 0.0;
    for (i=1;i<sm->sizePsi+1;i++) {
      proximal_term += (w[i]-w_b[i])*(w[i]-w_b[i]);
    }
    
    reg_master_obj = -dual_obj+0.5*rho*temp_var/(1+rho);
    expected_descent = reg_master_obj - primal_obj_b;

    v_k = (reg_master_obj - proximal_term*rho/2) - primal_obj_b; 

    primal_lower_bound = MAX(primal_lower_bound, reg_master_obj - 0.5*rho*(1+rho)*proximal_term);

#if (DEBUG_LEVEL>0)
    printf("ITER REG_MASTER_OBJ: %.4f\n", reg_master_obj);
    printf("ITER EXPECTED_DESCENT: %.4f\n", expected_descent);
    printf("ITER PRIMLA_OBJ_B: %.4f\n", primal_obj_b);
    printf("ITER RHO: %.4f\n", rho);
    printf("ITER ||w-w_b||^2: %.4f\n", proximal_term);
    printf("ITER PRIMAL_LOWER_BOUND: %.4f\n", primal_lower_bound);
    printf("ITER V_K: %.4f\n", v_k); 
#endif
    obj_difference = primal_obj - primal_obj_b; 


    if (primal_obj<primal_obj_b+kappa*expected_descent) {
      /* extra condition to be met */
      if ((gTd>m2*v_k)||(rho<min_rho+1E-8)) {
#if (DEBUG_LEVEL>0)
	printf("SERIOUS STEP\n");
#endif
	/* update cut_error */
	for (i=0;i<size_active;i++) {
	  cut_error[i] -= (primal_obj_b - 0.5*sprod_nn(w_b,w_b,sm->sizePsi)); 
	  cut_error[i] -= C*sprod_ns(w_b, dXc[i]->fvec); 
	  cut_error[i] += (primal_obj - 0.5*sprod_nn(w,w,sm->sizePsi));
	  cut_error[i] += C*sprod_ns(w, dXc[i]->fvec); 
	}
	primal_obj_b = primal_obj;
	for (i=1;i<sm->sizePsi+1;i++) {
	  w_b[i] = w[i];
	}
	null_step = 0;
	serious_counter++;	
      } else {
	/* increase step size */
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: SS(ii) FAILS.\n");
#endif
	serious_counter--; 
	rho = MAX(rho/10,min_rho);
      }
    } else { /* no sufficient decrease */
      serious_counter--; 
      if ((cut_error[size_active-1]>m3*last_sigma_k)&&(fabs(obj_difference)>last_z_k_norm+last_sigma_k)) {
#if (DEBUG_LEVEL>0)
	printf("NULL STEP: NS(ii) FAILS.\n");
#endif
	rho = MIN(10*rho,max_rho);
      } 
#if (DEBUG_LEVEL>0)
      else printf("NULL STEP\n");
#endif
    }
    /* update last_sigma_k */
    last_sigma_k = sigma_k; 
    last_z_k_norm = z_k_norm; 


    /* break away from while loop if more than certain proportioal decrease in primal objective */
    if (primal_obj_b/initial_primal_obj<1-decrease_proportion) {
      suff_decrease_cond = 1; 
    }

    /* clean up */
    if (iter % CLEANUP_CHECK == 0) {
      size_active = resize_cleanup(size_active, idle, alpha, delta, gammaG0, proximal_rhs, G, dXc, cut_error);
    }

	time(&iter_end);

#if (DEBUG_LEVEL==1)
	char msg[20];
	sprintf(msg,"ITER %d",iter);
    print_time(iter_start, iter_end, msg);
#endif
  } // end cutting plane while loop 

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
    free(G[j]);
    free_example(dXc[j],0);	
  }
  free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
  free(idle);
  free(gammaG0);
  free(proximal_rhs);
  free(cut_error); 

  /* copy and free */
  for (i=1;i<sm->sizePsi+1;i++) {
    w[i] = w_b[i];
  }
  free(w_b);

  return(primal_obj_b);

}
Пример #19
0
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lhsXw,rhs_i;
  double      rhs=0;
  double      slack,ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *lhs_n=NULL;
  SVECTOR     *fy, *fydelta, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt_cacheupdate=0,rt_cacheconst=0,rt_cacheadd=0,rt_cachesum=0;
  double      rt1=0,rt2=0;
  long        progress;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;
  double      viol,viol_est,epsilon_est=0;
  long        uptr=0;
  long        *randmapping=NULL;
  long        batch_size=n;

  rt1=get_runtime();

  if(sparm->batch_size<100)
    batch_size=sparm->batch_size*n/100.0;

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    if(USE_FYCACHE) {
      fy=psi(ex[i].x,ex[i].y,sm,sparm);
      if(kparm->kernel_type == LINEAR_KERNEL) { /* store difference vector directly */
	diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); 
	free_svector(fy);
	fy=diff;
      }
    }
    else
      fy=NULL;
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm,sm);
    /* NOTE:  */
    for(i=0;i<n;i++) 
      if(loss(ex[i].y,ex[i].y,sparm) != 0) {
	printf("ERROR: Loss function returns non-zero value loss(y_%d,y_%d)\n",i,i);
	printf("       W4 algorithm assumes that loss(y_i,y_i)=0 for all i.\n");
	exit(1);
      }
  }
  
  if(kparm->kernel_type == LINEAR_KERNEL)
    lhs_n=create_nvector(sm->sizePsi);

  /* randomize order or training examples */
  if(batch_size<n)
    randmapping=random_order(n);

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=rt_init;

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      alphasum=0;
      for(j=0;(j<cset.m);j++) 
	  alphasum+=alpha[j];
      for(j=0,slack=-1;(j<cset.m) && (slack==-1);j++)  
	if(alpha[j] > alphasum/cset.m)
	  slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      slack=MAX(0,slack);

      rt_total+=MAX(get_runtime()-rt1,0);

      /**** find a violated joint constraint ****/
      lhs=NULL;
      rhs=0;
      if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
	rt1=get_runtime();
	/* Compute violation of constraints in cache for current w */
	if(struct_verbosity>=2) rt2=get_runtime();
	update_constraint_cache_for_model(ccache, svmModel);
	if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0);
	/* Is there is a sufficiently violated constraint in cache? */
	viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2);
	if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { 
	  /* There is a sufficiently violated constraint in cache, so
	     use this constraint in this iteration. */
	  if(struct_verbosity>=2) rt2=get_runtime();
	  viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  cached_constraint=1;
	}
	else {
	  /* There is no sufficiently violated constraint in cache, so
	     update cache by computing most violated constraint
	     explicitly for batch_size examples. */
	  viol_est=0;
	  progress=0;
	  viol=compute_violation_of_constraint_in_cache(ccache,0);
	  for(j=0;(j<batch_size) || ((j<n)&&(viol-slack<sparm->epsilon));j++) {
	    if(struct_verbosity>=1) 
	      print_percent_progress(&progress,n,10,".");
	    uptr=uptr % n;
	    if(randmapping) 
	      i=randmapping[uptr];
	    else
	      i=uptr;
	    /* find most violating fydelta=fy-fybar and rhs for example i */
	    find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],
					  fycache[i],n,sm,sparm,
					  &rt_viol,&rt_psi,&argmax_count);
	    /* add current fy-fybar and loss to cache */
	    if(struct_verbosity>=2) rt2=get_runtime();
	    viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model,
			     i,fydelta,rhs_i,0.0001*sparm->epsilon/n,
			     sparm->ccache_size,&rt_cachesum);
	    if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0);
	    viol_est+=ccache->constlist[i]->viol;
	    uptr++;
	  }
	  cached_constraint=(j<n);
	  if(struct_verbosity>=2) rt2=get_runtime();
	  if(cached_constraint)
	    viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  else
	    viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n,
							 &lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  viol_est*=((double)n/j);
	  epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack);
	  if((struct_verbosity >= 1) && (j!=n))
	    printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)",
		   100.0*j/n,viol_est-slack,epsilon_est);
	}
	lhsXw=rhs-viol;

	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(kparm->kernel_type == LINEAR_KERNEL)
	  clear_nvector(lhs_n,sm->sizePsi);
	progress=0;
	rt_total+=MAX(get_runtime()-rt1,0);

	for(i=0; i<n; i++) {
	  rt1=get_runtime();

	  if(struct_verbosity>=1) 
	    print_percent_progress(&progress,n,10,".");

	  /* compute most violating fydelta=fy-fybar and rhs for example i */
	  find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n,
				      sm,sparm,&rt_viol,&rt_psi,&argmax_count);
	  /* add current fy-fybar to lhs of constraint */
	  if(kparm->kernel_type == LINEAR_KERNEL) {
	    add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */
	    free_svector(fydelta);
	  }
	  else {
	    append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */
	    lhs=fydelta;
	  }
	  rhs+=rhs_i;                         /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR_KERNEL)
	  lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0,
				 COMPACT_ROUNDING_THRESH);
	doc=create_example(cset.m,0,1,1,lhs);
	lhsXw=classify_example(svmModel,doc);
	free_example(doc,0);
	viol=rhs-lhsXw;

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      if(slack > (rhs-lhsXw+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw);
	/* exit(1); */
      }
      ceps=MAX(0,rhs-lhsXw-slack);
      if((ceps > sparm->epsilon) || cached_constraint) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=rhs;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=2) rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	if(struct_verbosity>=2) rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,ceps); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,ceps); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=3)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=3)
	  printf("done. ");
      }
      else {
	free_svector(lhs);
      }

      if(struct_verbosity>=1)
	printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
	       svmModel->sv_num-1,ceps,svmModel->maxdiff);

      rt_total+=MAX(get_runtime()-rt1,0);

  } while(finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)|| cached_constraint || (ceps > sparm->epsilon) );

  // originally like below ... finalize_iteration was not called because of short-circuit evaluation
//  } while(cached_constraint || (ceps > sparm->epsilon) || 
//	  finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)
//	 );
  
  if(struct_verbosity>=1) {
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,ceps));

    slack=0;
    for(j=0;j<cset.m;j++) 
      slack=MAX(slack,
		cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    if(kparm->kernel_type == LINEAR_KERNEL)
      modellength=model_length_n(svmModel);
    else
      modellength=model_length_s(svmModel);
    dualitygap=(0.5*modellength*modellength+sparm->C*viol)
               -(alphasum-0.5*modellength*modellength);
    
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Primal objective value: pval=%.5f\n",
	    0.5*modellength*modellength+sparm->C*viol);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    printf("Number of SV: %ld \n",svmModel->sv_num-1);
    printf("Norm of weight vector: |w|=%.5f\n",modellength);
    printf("Value of slack variable (on working set): xi=%.5f\n",slack);
    printf("Value of slack variable (global): xi=%.5f\n",viol);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    if(struct_verbosity>=2) 
      printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total,
	   (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, 
	   (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total,
	   (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total,
	   (100.0*rt_cachesum)/rt_total);
    else if(struct_verbosity==1) 
      printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0);
  }
  if(ccache) {
    long cnum=0;
    CCACHEELEM *celem;
    for(i=0;i<n;i++) 
      for(celem=ccache->constlist[i];celem;celem=celem->next) 
	cnum++;
    printf("Final number of constraints in cache: %ld\n",cnum);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
    free_model(svmModel,0);
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(lhs_n)
    free_nvector(lhs_n);
  if(ccache)    
    free_constraint_cache(ccache);
  for(i=0;i<n;i++)
    if(fycache[i])
      free_svector(fycache[i]);
  free(fycache);
  free(alpha); 
  free(alphahist); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
  if(kparm->gram_matrix)
    free_matrix(kparm->gram_matrix);
}
Пример #20
0
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
		      LEARN_PARM *lparm, KERNEL_PARM *kparm, 
		      STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        newconstraints=0, totconstraints=0, activenum=0; 
  int         opti_round, *opti, fullround, use_shrinking;
  long        old_totconstraints=0;
  double      epsilon,svmCnorm;
  long        tolerance,new_precision=1,dont_stop=0;
  double      lossval,factor,dist;
  double      margin=0;
  double      slack, *slacks, slacksum, ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0,lastoptcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  SVECTOR     *fy, *fybar, *f, **fycache=NULL;
  SVECTOR     *slackvec;
  WORD        slackv[2];
  MODEL       *svmModel=NULL;
  KERNEL_CACHE *kcache=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0;
  double      rt1,rt2;

  rt1=get_runtime();

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  /* initialize shrinking-style example selection heuristic */ 
  if(alg_type == NSLACK_SHRINK_ALG)
    use_shrinking=1;
  else
    use_shrinking=0;
  opti=(int*)my_malloc(n*sizeof(int));
  for(i=0;i<n;i++) {
    opti[i]=0;
  }
  opti_round=0;

  /* normalize regularization parameter C by the number of training examples */
  svmCnorm=sparm->C/n;

  if(sparm->slack_norm == 1) {
    lparm->svm_c=svmCnorm;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */
    lparm->sharedslack=0;
    if(kparm->kernel_type != LINEAR_KERNEL) {
      printf("ERROR: Kernels are not implemented for L2 slack norm!"); 
      fflush(stdout);
      exit(0); 
    }
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  epsilon=100.0;                  /* start with low precision and
				     increase later */
  tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less
                                     than that number of constraints
                                     is not fulfilled */
  lparm->biased_hyperplane=0;     /* set threshold to zero */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }

  /* set initial model and slack variables*/
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  if(kparm->kernel_type != LINEAR_KERNEL)
    kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size);
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,kcache,svmModel,alpha);
  if(kcache)
    kernel_cache_cleanup(kcache);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  if(USE_FYCACHE) {
    fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *));
    for(i=0;i<n;i++) {
      fy=psi(ex[i].x,ex[i].y,sm,sparm);
      if(kparm->kernel_type == LINEAR_KERNEL) {
	diff=add_list_ss(fy); /* store difference vector directly */
	free_svector(fy);
	fy=diff;
      }
      fycache[i]=fy;
    }
  }

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=MAX(get_runtime()-rt1,0);

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively increase precision */

    epsilon=MAX(epsilon*0.49999999999,sparm->epsilon);
    new_precision=1;
    if(epsilon == sparm->epsilon)   /* for final precision, find all SV */
      tolerance=0; 
    lparm->epsilon_crit=epsilon/2;  /* svm precision must be higher than eps */
    if(struct_verbosity>=1)
      printf("Setting current working precision to %g.\n",epsilon);

    do { /* iteration until (approx) all SV are found for current
            precision and tolerance */
      
      opti_round++;
      activenum=n;
      dont_stop=0;
      old_totconstraints=totconstraints;

      do { /* with shrinking turned on, go through examples that keep
	      producing new constraints */

	if(struct_verbosity>=1) { 
	  printf("Iter %i (%ld active): ",++numIt,activenum); 
	  fflush(stdout);
	}
	
	ceps=0;
	fullround=(activenum == n);

	for(i=0; i<n; i++) { /*** example loop ***/
	  
	  rt1=get_runtime();
	    
	  if((!use_shrinking) || (opti[i] != opti_round)) {
	                                /* if the example is not shrunk
	                                away, then see if it is necessary to 
					add a new constraint */
	    rt2=get_runtime();
	    argmax_count++;
	    if(sparm->loss_type == SLACK_RESCALING) 
	      ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
								ex[i].y,sm,
								sparm);
	    else
	      ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
								 ex[i].y,sm,
								 sparm);
	    rt_viol+=MAX(get_runtime()-rt2,0);
	    
	    if(empty_label(ybar)) {
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	      if(struct_verbosity>=2)
		printf("no-incorrect-found(%i) ",i);
	      continue;
	    }
	  
	    /**** get psi(y)-psi(ybar) ****/
	    rt2=get_runtime();
	    if(fycache) 
	      fy=copy_svector(fycache[i]);
	    else
	      fy=psi(ex[i].x,ex[i].y,sm,sparm);
	    fybar=psi(ex[i].x,ybar,sm,sparm);
	    rt_psi+=MAX(get_runtime()-rt2,0);
	    
	    /**** scale feature vector and margin by loss ****/
	    lossval=loss(ex[i].y,ybar,sparm);
	    if(sparm->slack_norm == 2)
	      lossval=sqrt(lossval);
	    if(sparm->loss_type == SLACK_RESCALING)
	      factor=lossval;
	    else               /* do not rescale vector for */
	      factor=1.0;      /* margin rescaling loss type */
	    for(f=fy;f;f=f->next)
	      f->factor*=factor;
	    for(f=fybar;f;f=f->next)
	      f->factor*=-factor;
	    margin=lossval;

	    /**** create constraint for current ybar ****/
	    append_svector_list(fy,fybar);/* append the two vector lists */
	    doc=create_example(cset.m,0,i+1,1,fy);

	    /**** compute slack for this example ****/
	    slack=0;
	    for(j=0;j<cset.m;j++) 
	      if(cset.lhs[j]->slackid == i+1) {
		if(sparm->slack_norm == 2) /* works only for linear kernel */
		  slack=MAX(slack,cset.rhs[j]
			          -(classify_example(svmModel,cset.lhs[j])
				    -sm->w[sizePsi+i]/(sqrt(2*svmCnorm))));
		else
		  slack=MAX(slack,
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
	      }
	    
	    /**** if `error' add constraint and recompute ****/
	    dist=classify_example(svmModel,doc);
	    ceps=MAX(ceps,margin-dist-slack);
	    if(slack > (margin-dist+0.0001)) {
	      printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	      printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	      printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist);
	      /* exit(1); */
	    }
	    if((dist+slack)<(margin-epsilon)) { 
	      if(struct_verbosity>=2)
		{printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);}
	      if(struct_verbosity==1)
		{printf("."); fflush(stdout);}
	      
	      /**** resize constraint matrix and add new constraint ****/
	      cset.m++;
	      cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m);
	      if(kparm->kernel_type == LINEAR_KERNEL) {
		diff=add_list_ss(fy); /* store difference vector directly */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(diff));
		else if(sparm->slack_norm == 2) {
		  /**** add squared slack variable to feature vector ****/
		  slackv[0].wnum=sizePsi+i;
		  slackv[0].weight=1/(sqrt(2*svmCnorm));
		  slackv[1].wnum=0; /*terminator*/
		  slackvec=create_svector(slackv,NULL,1.0);
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    add_ss(diff,slackvec));
		  free_svector(slackvec);
		}
		free_svector(diff);
	      }
	      else { /* kernel is used */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(fy));
		else if(sparm->slack_norm == 2)
		  exit(1);
	      }
	      cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m);
	      cset.rhs[cset.m-1]=margin;
	      alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
	      alpha[cset.m-1]=0;
	      alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
	      alphahist[cset.m-1]=optcount;
	      newconstraints++;
	      totconstraints++;
	    }
	    else {
	      printf("+"); fflush(stdout); 
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	    }

	    free_example(doc,0);
	    free_svector(fy); /* this also free's fybar */
	    free_label(ybar);
	  }

	  /**** get new QP solution ****/
	  if((newconstraints >= sparm->newconstretrain) 
	     || ((newconstraints > 0) && (i == n-1))
	     || (new_precision && (i == n-1))) {
	    if(struct_verbosity>=1) {
	      printf("*");fflush(stdout);
	    }
	    rt2=get_runtime();
	    free_model(svmModel,0);
	    svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	    /* Always get a new kernel cache. It is not possible to use the
	       same cache for two different training runs */
	    if(kparm->kernel_type != LINEAR_KERNEL)
	      kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size);
	    /* Run the QP solver on cset. */
	    svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
				   lparm,kparm,kcache,svmModel,alpha);
	    if(kcache)
	      kernel_cache_cleanup(kcache);
	    /* Always add weight vector, in case part of the kernel is
	       linear. If not, ignore the weight vector since its
	       content is bogus. */
	    add_weight_vector_to_linear_model(svmModel);
	    sm->svm_model=svmModel;
	    sm->w=svmModel->lin_weights; /* short cut to weight vector */
	    optcount++;
	    /* keep track of when each constraint was last
	       active. constraints marked with -1 are not updated */
	    for(j=0;j<cset.m;j++) 
	      if((alphahist[j]>-1) && (alpha[j] != 0))  
		alphahist[j]=optcount;
	    rt_opt+=MAX(get_runtime()-rt2,0);
	    
	    if(new_precision && (epsilon <= sparm->epsilon))  
	      dont_stop=1; /* make sure we take one final pass */
	    new_precision=0;
	    newconstraints=0;
	  }	

	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();
	
	if(struct_verbosity>=1)
	  printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
		 svmModel->sv_num-1,ceps,svmModel->maxdiff);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=2)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,
				    MAX(50,optcount-lastoptcount));
	lastoptcount=optcount;
	if(struct_verbosity>=2)
	  printf("done. (NumConst=%d)\n",cset.m);
	
	rt_total+=MAX(get_runtime()-rt1,0);
	
      } while(use_shrinking && (activenum > 0)); /* when using shrinking, 
						    repeat until all examples 
						    produced no constraint at
						    least once */

    } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop);

  } while((epsilon > sparm->epsilon) 
	  || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm));  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    /**** WARNING: If positivity constraints are used, then the
	  maximum slack id is larger than what is allocated
	  below ****/
    slacks=(double *)my_malloc(sizeof(double)*(n+1));
    for(i=0; i<=n; i++) { 
      slacks[i]=0;
    }
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
		cset.rhs[j]
	         -(classify_example(svmModel,cset.lhs[j])
		   -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm))));
    }
    slacksum=0;
    for(i=1; i<=n; i++)  
      slacksum+=slacks[i];
    free(slacks);
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    modellength=model_length_s(svmModel);
    dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps))
               -(alphasum-0.5*modellength*modellength);
    
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,epsilon));
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
	     svmModel->at_upper_bound,n);
      printf("Norm of weight vector: |w|=%.5f\n",modellength);
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     modellength);
    }
    printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, 
	   (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(fycache) {
    for(i=0;i<n;i++)
      free_svector(fycache[i]);
    free(fycache);
  }
  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(alphahist); 
  free(opti); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
}
Пример #21
0
double add_constraint_to_constraint_cache(CCACHE *ccache, MODEL *svmModel, int exnum, SVECTOR *fydelta, double rhs, double gainthresh, int maxconst, double *rt_cachesum)
     /* add new constraint fydelta*w>rhs for example exnum to cache,
	if it is more violated (by gainthresh) than the currently most
	violated constraint in cache. if this grows the number of
	cached constraints for this example beyond maxconst, then the
	least recently used constraint is deleted. the function
	assumes that update_constraint_cache_for_model has been
	run. */
{
  double  viol,viol_gain,viol_gain_trunc;
  double  dist_ydelta;
  DOC     *doc_fydelta;
  SVECTOR *fydelta_new;
  CCACHEELEM *celem;
  int     cnum;
  double  rt2=0;

  /* compute violation of new constraint */
  doc_fydelta=create_example(1,0,1,1,fydelta);
  dist_ydelta=classify_example(svmModel,doc_fydelta);
  free_example(doc_fydelta,0);  
  viol=rhs-dist_ydelta;
  viol_gain=viol-ccache->constlist[exnum]->viol;
  viol_gain_trunc=viol-MAX(ccache->constlist[exnum]->viol,0);
  ccache->avg_viol_gain[exnum]=viol_gain;

  /* check if violation of new constraint is larger than that of the
     best cache element */
  if(viol_gain > gainthresh) {
    fydelta_new=fydelta;
    if(struct_verbosity>=2) rt2=get_runtime();
    if(svmModel->kernel_parm.kernel_type == LINEAR_KERNEL) {
      if(COMPACT_CACHED_VECTORS == 1) { /* eval sum for linear */
	fydelta_new=add_list_sort_ss_r(fydelta,COMPACT_ROUNDING_THRESH);  
	free_svector(fydelta);
      }
      else if(COMPACT_CACHED_VECTORS == 2) {
	fydelta_new=add_list_ss_r(fydelta,COMPACT_ROUNDING_THRESH); 
	free_svector(fydelta);
      }
      else if(COMPACT_CACHED_VECTORS == 3) {
	fydelta_new=add_list_ns_r(fydelta,COMPACT_ROUNDING_THRESH); 
	free_svector(fydelta);
      }
    }
    if(struct_verbosity>=2) (*rt_cachesum)+=MAX(get_runtime()-rt2,0);
    celem=ccache->constlist[exnum];
    ccache->constlist[exnum]=(CCACHEELEM *)my_malloc(sizeof(CCACHEELEM));
    ccache->constlist[exnum]->next=celem;
    ccache->constlist[exnum]->fydelta=fydelta_new;
    ccache->constlist[exnum]->rhs=rhs;
    ccache->constlist[exnum]->viol=viol;
    ccache->changed[exnum]+=2;

    /* remove last constraint in list, if list is longer than maxconst */
    cnum=2;
    for(celem=ccache->constlist[exnum];celem && celem->next && celem->next->next;celem=celem->next)
      cnum++;
    if(cnum>maxconst) {
      free_svector(celem->next->fydelta);
      free(celem->next);
      celem->next=NULL;
    }
  }
  else {
    free_svector(fydelta);
  }
  return(viol_gain_trunc);
}
Пример #22
0
int main(int argc, char* argv[]) {

  double *w; /* weight vector */
  long m, i;
  double C, epsilon;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  char trainfile[1024];
  char modelfile[1024];
  int MAX_ITER;
  /* new struct variables */
  SVECTOR **fycache, *diff, *fy;
  EXAMPLE *ex;
	SAMPLE alldata;
  SAMPLE sample;
	SAMPLE val;
  STRUCT_LEARN_PARM sparm;
  STRUCTMODEL sm;
  
  double primal_obj;
  double stop_crit; 
	char itermodelfile[2000];

	/* self-paced learning variables */
	double init_spl_weight;
	double spl_weight;
	double spl_factor;
	int *valid_examples;
 

  /* read input parameters */
	my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm, 
													&init_spl_weight, &spl_factor); 

  epsilon = learn_parm.eps;
  C = learn_parm.svm_c;
  MAX_ITER = learn_parm.maxiter;

  /* read in examples */
  alldata = read_struct_examples(trainfile,&sparm);
  int ntrain = (int) round(1.0*alldata.n); /* no validation set */
	if(ntrain < alldata.n)
	{
 	 long *perm = randperm(alldata.n);
 	 sample = generate_train_set(alldata, perm, ntrain);
 	 val = generate_validation_set(alldata, perm, ntrain);
 	 free(perm);
	}
	else
	{
		sample = alldata;
	}
  ex = sample.examples;
  m = sample.n;
  
  /* initialization */
  init_struct_model(alldata,&sm,&sparm,&learn_parm,&kernel_parm); 

  w = create_nvector(sm.sizePsi);
  clear_nvector(w, sm.sizePsi);
  sm.w = w; /* establish link to w, as long as w does not change pointer */

  /* some training information */
  printf("C: %.8g\n", C);
	printf("spl weight: %.8g\n",init_spl_weight);
  printf("epsilon: %.8g\n", epsilon);
  printf("sample.n: %d\n", sample.n); 
  printf("sm.sizePsi: %ld\n", sm.sizePsi); fflush(stdout);


  /* prepare feature vector cache for correct labels with imputed latent variables */
  fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*));
  for (i=0;i<m;i++) {
    fy = psi(ex[i].x, ex[i].y, &sm, &sparm);
    diff = add_list_ss(fy);
    free_svector(fy);
    fy = diff;
    fycache[i] = fy;
  }

 	/* learn initial weight vector using all training examples */
	valid_examples = (int *) malloc(m*sizeof(int));     

  /* errors for validation set */

  double cur_loss, best_loss = DBL_MAX;
  int loss_iter;


	/* initializations */
	spl_weight = init_spl_weight;

	/* solve biconvex self-paced learning problem */
	primal_obj = alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight);
	printf("primal objective: %.4f\n", primal_obj);
	fflush(stdout);
	//alternate_convex_search(w, m, MAX_ITER, C, epsilon, fycache, ex, &sm, &sparm, valid_examples, spl_weight);
	int nValid = 0;
	for (i=0;i<m;i++) {
		if(valid_examples[i]) {
			nValid++;
		}
	}

		

	if(ntrain < alldata.n) {
		cur_loss = compute_current_loss(val,&sm,&sparm);
		printf("CURRENT LOSS: %f\n",cur_loss);
	}
  

  /* write structural model */
  write_struct_model(modelfile, &sm, &sparm);
  // skip testing for the moment  

  /* free memory */
  free_struct_sample(alldata);
	if(ntrain < alldata.n)
	{
		free(sample.examples);
		free(val.examples);
	}
  free_struct_model(sm, &sparm);
  for(i=0;i<m;i++) {
    free_svector(fycache[i]);
  }
  free(fycache);

	free(valid_examples);
   
  return(0); 
  
}
Пример #23
0
SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm) {
/*
  Read input examples {(x_1,y_1),...,(x_n,y_n)} from file.
  The type of pattern x and label y has to follow the definition in 
  svm_struct_latent_api_types.h.  
*/
  SAMPLE sample;

  int i, j;
  SVECTOR *temp_sub=NULL;
  double vecDistance;
  long n_neighbors=0;

  // open the file containing candidate bounding box dimensions/labels/featurePath and image label
  FILE *fp = fopen(file, "r");
  if(fp==NULL){
      printf("Error: Cannot open input file %s\n",file);
      exit(1);
  }

  sample.n = 1;  
  sample.examples = (EXAMPLE *) malloc(sample.n*sizeof(EXAMPLE));
  if(!sample.examples) die("Memory error.");
  sample.examples[0].x.n_pos = 0;
  sample.examples[0].x.n_neg = 0;

  fscanf(fp,"%d", &sample.examples[0].n_imgs);
    
  // Initialise pattern 
  sample.examples[0].x.example_cost = 1;

  sample.examples[0].x.x_is = (SUB_PATTERN *) malloc(sample.examples[0].n_imgs*sizeof(SUB_PATTERN));
  if(!sample.examples[0].x.x_is) die("Memory error.");
  sample.examples[0].y.labels = (int *) malloc(sample.examples[0].n_imgs*sizeof(int));
  if(!sample.examples[0].y.labels) die("Memory error.");

  SVECTOR *temp=NULL;

  for(i = 0; i < sample.examples[0].n_imgs; i++){  
      fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi1_file_name);
      fscanf(fp,"%s",sample.examples[0].x.x_is[i].phi2_file_name);
      fscanf(fp, "%d", &sample.examples[0].x.x_is[i].id);
      fscanf(fp, "%d", &sample.examples[0].y.labels[i]);

      sample.examples[0].x.x_is[i].phi1 = read_sparse_vector(sample.examples[0].x.x_is[i].phi1_file_name, sample.examples[0].x.x_is[i].id, sparm);
      sample.examples[0].x.x_is[i].phi2 = read_sparse_phi2(sample.examples[0].x.x_is[i].phi2_file_name, sparm);
      temp = create_svector_with_index(sample.examples[0].x.x_is[i].phi2->words, "", 1, sparm->phi1_size);
      sample.examples[0].x.x_is[i].phi1phi2_pos = add_ss(sample.examples[0].x.x_is[i].phi1, temp);
      free_svector(temp);
      sample.examples[0].x.x_is[i].phi1phi2_neg = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size));
      sample.examples[0].x.x_is[i].phi1phi2_shift = create_svector_with_index(sample.examples[0].x.x_is[i].phi1phi2_pos->words, "", 1, (sparm->phi1_size+sparm->phi2_size)*2);

      if(sample.examples[0].y.labels[i] == 1) {
          sample.examples[0].x.n_pos++;
      } 
      else{
          sample.examples[0].x.n_neg++;
      }
  }
  sample.examples[0].y.n_pos = sample.examples[0].x.n_pos;
  sample.examples[0].y.n_neg = sample.examples[0].x.n_neg;

  sample.examples[0].x.neighbors = (int **) malloc(sample.examples[0].n_imgs*sizeof(int*));
  sample.examples[0].x.n_neighbors=0;
  for (i = 0; i < sample.examples[0].n_imgs; i++){
      sample.examples[0].x.neighbors[i] = (int *) malloc(sample.examples[0].n_imgs*sizeof(int));
      for (j=(i+1); j < sample.examples[0].n_imgs; j++){
          temp_sub = sub_ss(sample.examples[0].x.x_is[i].phi2, sample.examples[0].x.x_is[j].phi2);
          vecDistance = sprod_ss(temp_sub, temp_sub);
          free_svector(temp_sub);
          if(vecDistance < sparm->pairwise_threshold){
            sample.examples[0].x.neighbors[i][j]=1;
            sample.examples[0].x.n_neighbors++;
          }
          else{
            sample.examples[0].x.neighbors[i][j]=0;
          }
      }
  }
  printf("No of neighbors = %d\n",sample.examples[0].x.n_neighbors);
  fflush(stdout);

  return(sample);
}
Пример #24
0
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j;
  double *alpha;
  DOC **dXc; // constraint matrix 
  double *delta; // rhs of constraints 
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **G = NULL;
	double **G2 = NULL;
	double **qmatrix = NULL;
	SVECTOR *f;
	int r;

  // set parameters for hideo solver 
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  // changed from 1e-15 
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  //qmatrix = (double **) malloc(sizeof(double *)*10);
  //assert(qmatrix!=NULL);

  printf("Running structural SVM solver: "); fflush(stdout); 
	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    // add  constraint 
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;


	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	//alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	//assert(alpha!=NULL);
   		//for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){
   		//	alpha[j] = 0.0;
   		//}
   		alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;

		idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;

		
		qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active);
  		assert(qmatrix!=NULL);

		qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size));
		for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){
			qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1));
		}

		// update Gram matrix 
		G = (double **) realloc(G, sizeof(double *)*size_active);
		assert(G!=NULL);
		G[size_active-1] = NULL;
		for(j = 0; j < size_active; j++) {
			G[j] = (double *) realloc(G[j], sizeof(double)*size_active);
			assert(G[j]!=NULL);
		}

		for(j = 0; j < size_active-1; j++) {
			G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
			G[size_active-1][j] = G[size_active-1][j]/2;
			G[j][size_active-1]  = G[size_active-1][j];
		}
		G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

		// hack: add a constant to the diagonal to make sure G is PSD 
		G[size_active-1][size_active-1] += 1e-6;

	   	// solve QP to update alpha 
		//r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size));
		r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0);
	    
		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			fflush(stdout);
			//exit(1);
			while(r==1295) {
				printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization);
				fflush(stdout);
				for(i=0;i<size_active;i++) {
					G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization;
				}
				sparm->gram_regularization *= 10;
				r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1);
			}
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

	   	clear_nvector(w,sm->sizePsi);
	   	for (j=0;j<size_active;j++) {
	     	if (alpha[j]>C*ALPHA_THRESHOLD) {
					add_vector_ns(w,dXc[j]->fvec,alpha[j]);
					idle[j] = 0;
	     	}
			else
				idle[j]++;
	   	}
	   	for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){
	   		if (alpha[size_active+j] > EQUALITY_EPSILON){
	   			w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j];
	   		}	   		
	   	}

	   	for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
	   		}
	   	}	   

	   	for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		//assert(w[j] <= 0);
	   		if(w[j]>0){
	   			printf("j = %ld, w[j] = %0.6f\n", j, w[j]);
	   			fflush(stdout);
	   		}
	   		
	   	}	

		cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);

		for(i = 0; i < size_active; i++) {
			cur_slack[i] = 0.0;
			for(f = dXc[i]->fvec; f; f = f->next) {
				j = 0;
				while(f->words[j].wnum) {
					cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight;
					j++;
				}
			}
			if(cur_slack[i] >= delta[i])
				cur_slack[i] = 0.0;
			else
				cur_slack[i] = delta[i]-cur_slack[i];
		}

		mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}

		if(size_active > 1)
			threshold = cur_slack[mv_iter];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter);
		}

		free(alpha);
		alpha=NULL;

 	} // end cutting plane while loop 

	//primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  // free memory
  for (j=0;j<size_active;j++) {
		free(G[j]);
    free_example(dXc[j],1);	
  }
	free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  //return(primal_obj);
  return;
}
Пример #25
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j,t;
  double *alpha;
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **psiDiffs = NULL;
	SVECTOR *f;
	int r;
	long fnum, last_wnum;

  /* set parameters for hideo solver */
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  /* changed from 1e-15 */
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  printf("Running structural SVM solver: "); fflush(stdout); 

	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    /* add  constraint */
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;

	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	/*alpha = (double*)realloc(alpha, sizeof(double)*size_active);
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;*/

		/*idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;*/

		/* update Gram matrix */
		psiDiffs = (double **) realloc(psiDiffs, sizeof(double *)*size_active);
		assert(psiDiffs!=NULL);
		psiDiffs[size_active-1] = NULL;
		psiDiffs[size_active-1] = (double *) realloc(psiDiffs[size_active-1], sizeof(double)*((sparm->phi1_size+sparm->phi2_size)*3));
		assert(psiDiffs[size_active-1]!=NULL);
		
		fnum = 0;
		last_wnum = 0;
		while(dXc[size_active-1]->fvec->words[fnum].wnum) {
			for (t = last_wnum+1; t < dXc[size_active-1]->fvec->words[fnum].wnum; t++)	{
				psiDiffs[size_active-1][t-1] = 0;
			}
			psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = dXc[size_active-1]->fvec->words[fnum].weight;
			/*if((psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]<EQUALITY_EPSILON) && (psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]>(-1*EQUALITY_EPSILON))){
				psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = 0;
			}*/
			last_wnum = dXc[size_active-1]->fvec->words[fnum].wnum;
			fnum++;
		}
		for (t = (last_wnum+1); t <= (sparm->phi1_size+sparm->phi2_size)*3; t++)	{
			psiDiffs[size_active-1][t-1] = 0;
		}			

   		/* solve QP to update w */
   		clear_nvector(w,sm->sizePsi);
   		//cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);
   		cur_slack = (double *) realloc(cur_slack,sizeof(double));

		r = mosek_qp_optimize(psiDiffs, delta, w, cur_slack, (long) size_active, C, &cur_obj, (sparm->phi1_size+sparm->phi2_size)*3, (sparm->phi1_size+sparm->phi2_size)*2);

		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			exit(1);
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

		for(j = 1; j <= (sparm->phi1_size+sparm->phi2_size)*3; j++) {
			if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
   			}
		}

		/*for (j=0;j<size_active;j++) {
	     	if (cur_slack[j]>ALPHA_THRESHOLD) {
					idle[j] = 0;
	     	}
				else
					idle[j]++;
   		}*/

		/*mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}*/

		if(size_active > 1)
			//threshold = cur_slack[mv_iter];
			threshold = cur_slack[0];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		/*if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &cur_slack, &delta, &dXc, &psiDiffs, &mv_iter);
		}*/

 	} // end cutting plane while loop 

	primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
		free(psiDiffs[j]);
    free_example(dXc[j],1);	
  }
	free(psiDiffs);
  free(dXc);
  //free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	//free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  return(primal_obj);
}
Пример #26
0
SVECTOR *psi(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
/*
  Creates the feature vector \Psi(x,y) and return a pointer to 
  sparse vector SVECTOR in SVM^light format. The dimension of the 
  feature vector returned has to agree with the dimension in sm->sizePsi. 
*/
  SVECTOR *fvec=NULL; 
  SVECTOR *psi1=NULL; 
  SVECTOR *psi2=NULL;
  SVECTOR *temp_psi=NULL; 
  SVECTOR *temp_sub=NULL;


  WORD *words = NULL;
  words = (WORD *) malloc(sizeof(WORD));
  if(!words) die("Memory error."); 
  words[0].wnum = 0;
  words[0].weight = 0;
  fvec = create_svector(words,"",1);
  psi1 = create_svector(words,"",1);
  psi2 = create_svector(words,"",1);
  free(words);

  int i,j = 0;
  
  for (i = 0; i < (x.n_pos+x.n_neg); i++){
      if(y.labels[i] == 1){
          temp_psi = add_ss(psi1, x.x_is[i].phi1phi2_pos);
      }  
      else{
          temp_psi = add_ss(psi1, x.x_is[i].phi1phi2_neg);
      }
      free_svector(psi1);
      psi1 = temp_psi;

      for (j=(i+1); j < (x.n_pos+x.n_neg); j++){
          if(x.neighbors[i][j]){
              if(y.labels[i] != y.labels[j]){
                  temp_sub = sub_ss_sq(x.x_is[i].phi1phi2_pos, x.x_is[j].phi1phi2_pos);
                  temp_psi = add_ss(psi2, temp_sub);
                  free_svector(temp_sub);
                  free_svector(psi2);
                  psi2 = temp_psi;
              }     
          }
      }
  }
  
  // scale w1 by 1/n
  temp_psi = smult_s(psi1, (float)1/(float)(x.n_pos+x.n_neg));
  free_svector(psi1);
  psi1 = temp_psi;
  
  // scale w2 by 1/n^2
  if (x.n_neighbors){
    temp_psi = smult_s(psi2, (float)1/(float)x.n_neighbors);
    free_svector(psi2);
    psi2 = temp_psi; 
  }  
  
  // concatenate psi1, psi2
  temp_psi = create_svector_with_index(psi2->words, "", 1, (sparm->phi1_size+sparm->phi2_size)*2);
  free_svector(psi2);
  fvec = add_ss(psi1, temp_psi);
  free_svector(temp_psi);
  free_svector(psi1);
  
  return(fvec);
}
Пример #27
0
void find_most_violated_constraint_marginrescaling(PATTERN x, LABEL y, LABEL *ybar, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
/*
  Finds the most violated constraint (loss-augmented inference), i.e.,
  computing argmax_{(ybar,hbar)} [<w,psi(x,ybar,hbar)> + loss(y,ybar,hbar)].
  The output (ybar,hbar) are stored at location pointed by 
  pointers *ybar and *hbar. 
*/
  int i, j;

  SVECTOR *temp_sub=NULL;

  double *unary_pos = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
  double *unary_neg =  (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
  double **binary =  (double**)malloc((x.n_pos+x.n_neg)*sizeof(double *));


  for (i = 0; i < (x.n_pos+x.n_neg); i++){
      binary[i] =  (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
      // compute unary potential for ybar.labels[i] == 1 
      unary_pos[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_pos);
      if(unary_pos[i] != 0){
        unary_pos[i] = (float)(-1*unary_pos[i])/(float)(x.n_pos+x.n_neg);
      }
      // compute unary potential for ybar.labels[i] == -1
      unary_neg[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_neg);
      if(unary_neg[i] != 0){
        unary_neg[i] = (float)(-1*unary_neg[i])/(float)(x.n_pos+x.n_neg);
      }

      if(y.labels[i] == 1){
          // add 1/n to 'ybar == -1' unary term
          unary_neg[i] -= (float)1/(float)(x.n_pos+x.n_neg);
      }
      else{
          // add 1/n to 'ybar == 1' unary term
          unary_pos[i] -= (float)1/(float)(x.n_pos+x.n_neg);
      }

      for (j = (i+1); j < (x.n_pos+x.n_neg); j++){
          if(x.neighbors[i][j]){
              temp_sub = sub_ss_sq(x.x_is[i].phi1phi2_shift, x.x_is[j].phi1phi2_shift);
              binary[i][j] = sprod_ns(sm->w, temp_sub);
              assert(binary[i][j] <= 0);
              free_svector(temp_sub);
          }
          else{
              binary[i][j] = 0;
          }
      }
  }

  if (x.n_neighbors){
    for (i = 0; i < (x.n_pos+x.n_neg); i++){
        for (j = (i+1); j < (x.n_pos+x.n_neg); j++){
          if(binary[i][j] != 0){
            binary[i][j] = (double)(-1*binary[i][j])/(double)x.n_neighbors;
          }            
        }
    }
  }

  ybar->labels = maxflowwrapper(unary_pos, unary_neg, binary, x.n_pos, x.n_neg);

  free(unary_pos);
  free(unary_neg);
  for (i = 0; i < (x.n_pos+x.n_neg); i++){
    free(binary[i]);
  }
  free(binary);

	return;

}
Пример #28
0
void classify_struct_example(PATTERN x, LABEL *y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {
/*
  Makes prediction with input pattern x with weight vector in sm->w,
  i.e., computing argmax_{(y)} <w,psi(x,y)>. 
  Output pair (y) is stored at location pointed to by 
  pointers *y. 
*/
  int i,j;

  SVECTOR *temp_sub=NULL;

  double *unary_pos = (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
  double *unary_neg =  (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
  double **binary =  (double**)malloc((x.n_pos+x.n_neg)*sizeof(double *));


  for (i = 0; i < (x.n_pos+x.n_neg); i++){
      binary[i] =  (double*)malloc((x.n_pos+x.n_neg)*sizeof(double));
      // compute unary potential for ybar.labels[i] == 1 
      unary_pos[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_pos);
      if(unary_pos[i] != 0){
        unary_pos[i] = (float)(-1*unary_pos[i])/(float)(x.n_pos+x.n_neg);
      }
      // compute unary potential for ybar.labels[i] == -1
      unary_neg[i] = sprod_ns(sm->w, x.x_is[i].phi1phi2_neg);
      if(unary_neg[i] != 0){
        unary_neg[i] = (float)(-1*unary_neg[i])/(float)(x.n_pos+x.n_neg);
      }
      for (j = (i+1); j < (x.n_pos+x.n_neg); j++){
          if(x.neighbors[i][j]){
              temp_sub = sub_ss_sq(x.x_is[i].phi1phi2_shift, x.x_is[j].phi1phi2_shift);
              binary[i][j] = sprod_ns(sm->w, temp_sub);
              assert(binary[i][j] <= 0);
              free_svector(temp_sub);
          }
          else{
              binary[i][j] = 0;
          }
      }
  }

  if (x.n_neighbors){
    for (i = 0; i < (x.n_pos+x.n_neg); i++){
      for (j = (i+1); j < (x.n_pos+x.n_neg); j++){
          if(binary[i][j] != 0){
            binary[i][j] = (double)(-1*binary[i][j])/(double)x.n_neighbors;
          }
      }
    }
  }

  y->labels = maxflowwrapper(unary_pos, unary_neg, binary, x.n_pos, x.n_neg);

  free(unary_pos);
  free(unary_neg);
  for (i = 0; i < (x.n_pos+x.n_neg); i++){
    free(binary[i]);
  }
  free(binary);
  
	return;

}
Пример #29
0
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
		      LEARN_PARM *lparm, KERNEL_PARM *kparm, 
		      STRUCTMODEL *sm)
{
  int         i,j;
  int         numIt=0;
  long        newconstraints=0, activenum=0; 
  int         opti_round, *opti;
  long        old_numConst=0;
  double      epsilon;
  long        tolerance;
  double      lossval,factor;
  double      margin=0;
  double      slack, *slacks, slacksum;
  long        sizePsi;
  double      *alpha=NULL;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  SVECTOR     *fy, *fybar, *f;
  SVECTOR     *slackvec;
  WORD        slackv[2];
  MODEL       *svmModel=NULL;
  KERNEL_CACHE *kcache=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0.0, rt_opt=0.0;
  long        rt1,rt2;

  init_struct_model(sample,sm,sparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  /* initialize example selection heuristic */ 
  opti=(int*)my_malloc(n*sizeof(int));
  for(i=0;i<n;i++) {
    opti[i]=0;
  }
  opti_round=0;

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */
    lparm->sharedslack=0;
    if(kparm->kernel_type != LINEAR) {
      printf("ERROR: Kernels are not implemented for L2 slack norm!"); 
      fflush(stdout);
      exit(0);
    }
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  epsilon=1.0;                    /* start with low precision and
				     increase later */
  tolerance=n/100;                /* increase precision, whenever less
                                     than that number of constraints
                                     is not fulfilled */
  lparm->biased_hyperplane=0;     /* set threshold to zero */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=realloc(alpha,sizeof(double)*cset.m);
    for(i=0; i<cset.m; i++) 
      alpha[i]=0;
  }

  /* set initial model and slack variables*/
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  printf("Starting Iterations\n");

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively increase precision */

    epsilon=MAX(epsilon*0.09999999999,sparm->epsilon);
    if(epsilon == sparm->epsilon)   /* for final precision, find all SV */
      tolerance=0;
    lparm->epsilon_crit=epsilon/2;  /* svm precision must be higher than eps */
    if(struct_verbosity>=1)
      printf("Setting current working precision to %g.\n",epsilon);

    do { /* iteration until (approx) all SV are found for current
            precision and tolerance */
      
      old_numConst=cset.m;
      opti_round++;
      activenum=n;

      do { /* go through examples that keep producing new constraints */

	if(struct_verbosity>=1) { 
	  printf("--Iteration %i (%ld active): ",++numIt,activenum); 
	  fflush(stdout);
	}
	
	for(i=0; i<n; i++) { /*** example loop ***/
	  
	  rt1=get_runtime();
	    
	  if(opti[i] != opti_round) {/* if the example is not shrunk
	                                away, then see if it is necessary to 
					add a new constraint */
	    if(sparm->loss_type == SLACK_RESCALING) 
	      ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
								ex[i].y,sm,
								sparm);
	    else
	      ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
								 ex[i].y,sm,
								 sparm);
	    
	    if(empty_label(ybar)) {
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	      if(struct_verbosity>=2)
		printf("no-incorrect-found(%i) ",i);
	      continue;
	    }
	  
	    /**** get psi(y)-psi(ybar) ****/
	    fy=psi(ex[i].x,ex[i].y,sm,sparm);
	    fybar=psi(ex[i].x,ybar,sm,sparm);
	    
	    /**** scale feature vector and margin by loss ****/
	    lossval=loss(ex[i].y,ybar,sparm);
	    if(sparm->slack_norm == 2)
	      lossval=sqrt(lossval);
	    if(sparm->loss_type == SLACK_RESCALING)
	      factor=lossval;
	    else               /* do not rescale vector for */
	      factor=1.0;      /* margin rescaling loss type */
	    for(f=fy;f;f=f->next)
	      f->factor*=factor;
	    for(f=fybar;f;f=f->next)
	      f->factor*=-factor;
	    margin=lossval;

	    /**** create constraint for current ybar ****/
	    append_svector_list(fy,fybar);/* append the two vector lists */
	    doc=create_example(cset.m,0,i+1,1,fy);

	    /**** compute slack for this example ****/
	    slack=0;
	    for(j=0;j<cset.m;j++) 
	      if(cset.lhs[j]->slackid == i+1) {
		if(sparm->slack_norm == 2) /* works only for linear kernel */
		  slack=MAX(slack,cset.rhs[j]
			          -(classify_example(svmModel,cset.lhs[j])
				    -sm->w[sizePsi+i]/(sqrt(2*sparm->C))));
		else
		  slack=MAX(slack,
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
	      }
	    
	    /**** if `error' add constraint and recompute ****/
	    if((classify_example(svmModel,doc)+slack)<(margin-epsilon)) { 
	      if(struct_verbosity>=2)
		{printf("(%i) ",i); fflush(stdout);}
	      if(struct_verbosity==1)
		{printf("."); fflush(stdout);}
	      
	      /**** resize constraint matrix and add new constraint ****/
	      cset.m++;
	      cset.lhs=realloc(cset.lhs,sizeof(DOC *)*cset.m);
	      if(kparm->kernel_type == LINEAR) {
		diff=add_list_ss(fy); /* store difference vector directly */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(diff));
		else if(sparm->slack_norm == 2) {
		  /**** add squared slack variable to feature vector ****/
		  slackv[0].wnum=sizePsi+i;
		  slackv[0].weight=1/(sqrt(2*sparm->C));
		  slackv[1].wnum=0; /*terminator*/
		  slackvec=create_svector(slackv,"",1.0);
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    add_ss(diff,slackvec));
		  free_svector(slackvec);
		}
		free_svector(diff);
	      }
	      else { /* kernel is used */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(fy));
		else if(sparm->slack_norm == 2)
		  exit(1);
	      }
	      cset.rhs=realloc(cset.rhs,sizeof(double)*cset.m);
	      cset.rhs[cset.m-1]=margin;
	      alpha=realloc(alpha,sizeof(double)*cset.m);
	      alpha[cset.m-1]=0;
	      newconstraints++;
	    }
	    else {
	      printf("+"); fflush(stdout); 
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	    }

	    free_example(doc,0);
	    free_svector(fy); /* this also free's fybar */
	    free_label(ybar);
	  }

	  /**** get new QP solution ****/
	  if((newconstraints >= sparm->newconstretrain) 
	     || ((newconstraints > 0) && (i == n-1))) {
	    if(struct_verbosity>=1) {
	      printf("*");fflush(stdout);
	    }
	    rt2=get_runtime();
	    free_model(svmModel,0);
	    svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	    /* Always get a new kernel cache. It is not possible to use the
	       same cache for two different training runs */
	    if(kparm->kernel_type != LINEAR)
	      kcache=kernel_cache_init(cset.m,lparm->kernel_cache_size);
	    /* Run the QP solver on cset. */
	    svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
				   lparm,kparm,kcache,svmModel,alpha);
	    if(kcache)
	      kernel_cache_cleanup(kcache);
	    /* Always add weight vector, in case part of the kernel is
	       linear. If not, ignore the weight vector since its
	       content is bogus. */
	    add_weight_vector_to_linear_model(svmModel);
	    sm->svm_model=svmModel;
	    sm->w=svmModel->lin_weights; /* short cut to weight vector */
	    rt_opt+=MAX(get_runtime()-rt2,0);
	    
	    newconstraints=0;
	  }	

	  rt_total+=MAX(get_runtime()-rt1,0);
	} /* end of example loop */

	if(struct_verbosity>=1)
	  printf("(NumConst=%d, SV=%ld, Eps=%.4f)\n",cset.m,svmModel->sv_num-1,
		 svmModel->maxdiff);

      } while(activenum > 0);   /* repeat until all examples produced no
				   constraint at least once */

    } while((cset.m - old_numConst) > tolerance) ;

  } while(epsilon > sparm->epsilon);  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    slacks=(double *)my_malloc(sizeof(double)*(n+1));
    for(i=0; i<=n; i++) { 
      slacks[i]=0;
    }
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
		cset.rhs[j]
	         -(classify_example(svmModel,cset.lhs[j])
		   -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*sparm->C))));
    }
    slacksum=0;
    for(i=0; i<=n; i++)  
      slacksum+=slacks[i];
    free(slacks);

    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,epsilon));
    printf("Total number of constraints added: %i\n",(int)cset.m);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
	     svmModel->at_upper_bound,n);
      printf("Norm of weight vector: |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    printf("Sum of slack variables: sum(xi_i)=%.5f\n",slacksum);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for SVM optimization)\n",
	   rt_total/100.0, 100.0*rt_opt/rt_total);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(opti); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
}
Пример #30
0
SVECTOR* find_cutting_plane(EXAMPLE *ex, SVECTOR **fycache, double *margin, long m, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm,
														int *valid_examples) {

  long i, j;
  SVECTOR *f, *fy, *fybar, *lhs;
  LABEL       ybar;
  double lossval;
  double *new_constraint;
	long valid_count = 0;

  long l,k;
  SVECTOR *fvec;
  WORD *words;  

  /* find cutting plane */
  lhs = NULL;
  *margin = 0;

	for (i=0;i<m;i++) {
		if (valid_examples[i]) {
			valid_count++;
		}
	}

  for (i=0;i<m;i++) {

		if (!valid_examples[i]) {
			continue;
		}

    find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, sm, sparm);
    /* get difference vector */
    fy = copy_svector(fycache[i]);
    fybar = psi(ex[i].x,ybar,sm,sparm);
    lossval = loss(ex[i].y,ybar,sparm);
    free_label(ybar);
		
    /* scale difference vector */
    for (f=fy;f;f=f->next) {
      //f->factor*=1.0/m;
      //f->factor*=ex[i].x.example_cost/m;
      f->factor*=ex[i].x.example_cost/valid_count;
    }
    for (f=fybar;f;f=f->next) {
      //f->factor*=-1.0/m;
      //f->factor*=-ex[i].x.example_cost/m;
      f->factor*=-ex[i].x.example_cost/valid_count;
    }
    /* add ybar to constraint */
    append_svector_list(fy,lhs);
    append_svector_list(fybar,fy);
    lhs = fybar;
    //*margin+=lossval/m;
    //*margin+=lossval*ex[i].x.example_cost/m;
    *margin+=lossval*ex[i].x.example_cost/valid_count;
  }

  /* compact the linear representation */
  new_constraint = add_list_nn(lhs, sm->sizePsi);
  free_svector(lhs);

  l=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) l++; // non-zero
  }
  words = (WORD*)my_malloc(sizeof(WORD)*(l+1)); 
  assert(words!=NULL);
  k=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) {
      words[k].wnum = i;
      words[k].weight = new_constraint[i]; 
      k++;
    }
  }
  words[k].wnum = 0;
  words[k].weight = 0.0;
  fvec = create_svector(words,"",1);

  free(words);
  free(new_constraint);

  return(fvec); 
}