Пример #1
0
SVECTOR* add_list_ns(SVECTOR *a) 
     /* computes the linear combination of the SVECTOR list weighted
	by the factor of each SVECTOR. assumes that the number of
	features is small compared to the number of elements in the
	list */
{
    SVECTOR *vec,*f;
    register WORD *ai;
    long totwords;
    double *sum;

    /* find max feature number */
    totwords=0;
    for(f=a;f;f=f->next) {
      ai=f->words;
      while (ai->wnum) {
	if(totwords<ai->wnum) 
	  totwords=ai->wnum;
	ai++;
      }
    }
    sum=create_nvector(totwords);
    /* printf("totwords=%ld, %p\n",totwords, (void *)sum); */

    clear_nvector(sum,totwords);
    for(f=a;f;f=f->next)  
      add_vector_ns(sum,f,f->factor);

    vec=create_svector_n(sum,totwords,"",1.0);
    free(sum);

    return(vec);
}
Пример #2
0
CCACHE *create_constraint_cache(SAMPLE sample, STRUCT_LEARN_PARM *sparm, 
				STRUCTMODEL *sm)
     /* create new constraint cache for training set */
{
  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  CCACHE      *ccache;
  int         i;

  ccache=(CCACHE *)my_malloc(sizeof(CCACHE));
  ccache->n=n;
  ccache->sm=sm;
  ccache->constlist=(CCACHEELEM **)my_malloc(sizeof(CCACHEELEM *)*n);
  ccache->avg_viol_gain=(double *)my_malloc(sizeof(double)*n);
  ccache->changed=(int *)my_malloc(sizeof(int)*n);
  for(i=0;i<n;i++) { 
    /* add constraint for ybar=y to cache */
    ccache->constlist[i]=(CCACHEELEM *)my_malloc(sizeof(CCACHEELEM));
    ccache->constlist[i]->fydelta=create_svector_n(NULL,0,NULL,1);
    ccache->constlist[i]->rhs=loss(ex[i].y,ex[i].y,sparm)/n;
    ccache->constlist[i]->viol=0;
    ccache->constlist[i]->next=NULL;
    ccache->avg_viol_gain[i]=0;
    ccache->changed[i]=0;
  }
  return(ccache);
}
Пример #3
0
MODEL *compact_linear_model(MODEL *model)
     /* Makes a copy of model where the support vectors are replaced
	with a single linear weight vector. */
     /* NOTE: It adds the linear weight vector also to newmodel->lin_weights */
     /* WARNING: This is correct only for linear models! */
{
  MODEL *newmodel;

  newmodel=(MODEL *)my_malloc(sizeof(MODEL));
  (*newmodel)=(*model);
  add_weight_vector_to_linear_model(newmodel);
  newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *)*2);
  newmodel->alpha = (double *)my_malloc(sizeof(double)*2);
  newmodel->index = NULL; /* index is not copied */
  newmodel->supvec[0] = NULL;
  newmodel->alpha[0] = 0.0;
  newmodel->supvec[1] = create_example(-1,0,0,0,
				       create_svector_n(newmodel->lin_weights,
							newmodel->totwords,
							NULL,1.0));
  newmodel->alpha[1] = 1.0;
  newmodel->sv_num=2;

  return(newmodel);
}
void        write_struct_model(char *file, STRUCTMODEL *sm, 
			       STRUCT_LEARN_PARM *sparm)
{
  /* Writes structural model sm to file file. */

  PyObject *pFunc, *pValue;
  // Reduce the support vectors if appropriate.
  if (sm->w && sm->lin_reduce) {
    MODEL *m = sm->svm_model;
    // Get rid of the old support vectors.
    int i;
    for (i=1; i<m->sv_num; ++i) {
      free_example(m->supvec[i], 1);
    }
    free(m->supvec);
    free(m->alpha);
    // Create the new model sv structurs.
    m->supvec = (DOC**)my_malloc(sizeof(DOC*)*2);
    m->supvec[0] = NULL;
    m->alpha = (double*)my_malloc(sizeof(double)*2);
    m->alpha[0]=0.0; m->alpha[1]=1.0;
    m->sv_num = 2;
    // Create the new support vector.
    SVECTOR *sv = create_svector_n(sm->w, sm->sizePsi,"",1.0);
    m->supvec[1] = create_example(0,0,1,1.0,sv);
  }
  // Call the relevant Python function.
  pFunc = getFunction(PYTHON_WRITE_MODEL);
  pValue = PyObject_CallFunction
    (pFunc,"sNN",file,StructModel_FromStructModel(sm),Sparm_FromSparm(sparm));
  PY_RUNCHECK;
  Py_DECREF(pValue);
}
Пример #5
0
double model_length_n(MODEL *model) 
     /* compute length of weight vector */
{
  long     i,totwords=model->totwords+1;
  double   sum,*weight_n;
  SVECTOR  *weight;

  if(model->kernel_parm.kernel_type != LINEAR) {
    printf("ERROR: model_length_n applies only to linear kernel!\n");
    exit(1);
  }
  weight_n=create_nvector(totwords);
  clear_nvector(weight_n,totwords);
  for(i=1;i<model->sv_num;i++) 
    add_list_n_ns(weight_n,model->supvec[i]->fvec,model->alpha[i]);
  weight=create_svector_n(weight_n,totwords,NULL,1.0);
  sum=sprod_ss(weight,weight);
  free(weight_n);
  free_svector(weight);
  return(sqrt(sum));
}
Пример #6
0
CCACHE *create_constraint_cache(SAMPLE sample, STRUCT_LEARN_PARM *sparm)
     /* create new constraint cache for training set */
{
  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  CCACHE      *ccache;
  int         i;

  ccache=(CCACHE *)malloc(sizeof(CCACHE));
  ccache->n=n;
  ccache->constlist=(CCACHEELEM **)malloc(sizeof(CCACHEELEM *)*n);
  for(i=0;i<n;i++) { 
    /* add constraint for ybar=y to cache */
    ccache->constlist[i]=(CCACHEELEM *)malloc(sizeof(CCACHEELEM));
    ccache->constlist[i]->fydelta=create_svector_n(NULL,0,"",1);
    ccache->constlist[i]->rhs=loss(ex[i].y,ex[i].y,sparm)/n;
    ccache->constlist[i]->viol=0;
    ccache->constlist[i]->next=NULL;
  }
  return(ccache);
}
Пример #7
0
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lossval,factor,dist;
  double      margin=0;
  double      slack, slacksum, ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *diff_n=NULL;
  SVECTOR     *fy, *fybar, *f, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt1,rt2;
  double      progress,progress_old;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;

  rt1=get_runtime();

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    fy=psi(ex[i].x,ex[i].y,sm,sparm);
    if(kparm->kernel_type == LINEAR) {
      diff=add_list_ss(fy); /* store difference vector directly */
      free_svector(fy);
      fy=diff;
    }
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm);
  }

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=MAX(get_runtime()-rt1,0);

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      slack=0;
      for(j=0;j<cset.m;j++) 
	slack=MAX(slack,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      
      /**** find a violated joint constraint ****/
      lhs=NULL;
      dist=0;
      if(alg_type == DUAL_CACHE_ALG) {
	/* see if it is possible to construct violated constraint from cache */
	update_constraint_cache_for_model(ccache, svmModel);
	dist=find_most_violated_joint_constraint_in_cache(ccache,&lhs,&margin);
      }

      rt_total+=MAX(get_runtime()-rt1,0);

      /* Is there a sufficiently violated constraint in cache? */
      if(dist-slack > MAX(epsilon/10,sparm->epsilon)) { 
	/* use constraint from cache */
	rt1=get_runtime();
	cached_constraint=1;
	if(kparm->kernel_type == LINEAR) {
	  diff=add_list_ns(lhs); /* Linear case: compute weighted sum */
	  free_svector_shallow(lhs);
	}
	else { /* Non-linear case: make sure we have deep copy for cset */
	  diff=copy_svector(lhs); 
	  free_svector_shallow(lhs);
	}
	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(lhs)
	  free_svector_shallow(lhs);
	lhs=NULL;
	if(kparm->kernel_type == LINEAR) {
	  diff_n=create_nvector(sm->sizePsi);
	  clear_nvector(diff_n,sm->sizePsi);
	}
	margin=0;
	progress=0;
	progress_old=progress;
	rt_total+=MAX(get_runtime()-rt1,0);

	/**** find most violated joint constraint ***/
	for(i=0; i<n; i++) {
	  
	  rt1=get_runtime();
      
	  progress+=10.0/n;
	  if((struct_verbosity==1) && (((int)progress_old) != ((int)progress)))
	    {printf(".");fflush(stdout); progress_old=progress;}
	  if(struct_verbosity>=2)
	    {printf("."); fflush(stdout);}

	  rt2=get_runtime();
	  argmax_count++;
	  if(sparm->loss_type == SLACK_RESCALING) 
	    ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
							      ex[i].y,sm,
							      sparm);
	  else
	    ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
							       ex[i].y,sm,
							       sparm);
	  rt_viol+=MAX(get_runtime()-rt2,0);
	  
	  if(empty_label(ybar)) {
	    printf("ERROR: empty label was returned for example (%i)\n",i);
	    /* exit(1); */
	    continue;
	  }
	  
	  /**** get psi(x,y) and psi(x,ybar) ****/
	  rt2=get_runtime();
	  fy=copy_svector(fycache[i]); /*<= fy=psi(ex[i].x,ex[i].y,sm,sparm);*/
	  fybar=psi(ex[i].x,ybar,sm,sparm);
	  rt_psi+=MAX(get_runtime()-rt2,0);
	  lossval=loss(ex[i].y,ybar,sparm);
	  free_label(ybar);
	  
	  /**** scale feature vector and margin by loss ****/
	  if(sparm->loss_type == SLACK_RESCALING)
	    factor=lossval/n;
	  else                 /* do not rescale vector for */
	    factor=1.0/n;      /* margin rescaling loss type */
	  for(f=fy;f;f=f->next)
	    f->factor*=factor;
	  for(f=fybar;f;f=f->next)
	    f->factor*=-factor;
	  append_svector_list(fybar,fy);   /* compute fy-fybar */
	  
	  /**** add current fy-fybar and loss to cache ****/
	  if(alg_type == DUAL_CACHE_ALG) {
	    if(kparm->kernel_type == LINEAR) 
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 add_list_ss(fybar),
						 lossval/n,sparm->ccache_size);
	    else
	      add_constraint_to_constraint_cache(ccache,svmModel,i,
						 copy_svector(fybar),
						 lossval/n,sparm->ccache_size);
	  }

	  /**** add current fy-fybar to constraint and margin ****/
	  if(kparm->kernel_type == LINEAR) {
	    add_list_n_ns(diff_n,fybar,1.0); /* add fy-fybar to sum */
	    free_svector(fybar);
	  }
	  else {
	    append_svector_list(fybar,lhs);  /* add fy-fybar to vector list */
	    lhs=fybar;
	  }
	  margin+=lossval/n;                 /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR) {
	  diff=create_svector_n(diff_n,sm->sizePsi,"",1.0);
	  free_nvector(diff_n);
	}
	else {
	  diff=lhs;
	}

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      doc=create_example(cset.m,0,1,1,diff);
      dist=classify_example(svmModel,doc);
      ceps=MAX(0,margin-dist-slack);
      if(slack > (margin-dist+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,margin-dist);
	/* exit(1); */
      }
      if(ceps > sparm->epsilon) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	if(sparm->slack_norm == 1) 
	  cset.lhs[cset.m]=create_example(cset.m,0,1,1,diff);
	else if(sparm->slack_norm == 2)
	  exit(1);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=margin;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=1) {
	    printf(":");fflush(stdout);
	  }
	  rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,MAX(ceps,sparm->epsilon)); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,MAX(ceps,sparm->epsilon)); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == DUAL_ALG) || (alg_type == DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=2)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=2)
	  printf("done. (NumConst=%d) ",cset.m);
      }
      else {
	free_svector(diff);
      }

      if(struct_verbosity>=1)
	printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
	       svmModel->sv_num-1,ceps,svmModel->maxdiff);

      free_example(doc,0);
	
      rt_total+=MAX(get_runtime()-rt1,0);

  } while((ceps > sparm->epsilon) || 
	  finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)
	 );
  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    /**** WARNING: If positivity constraints are used, then the
	  maximum slack id is larger than what is allocated
	  below ****/
    slacksum=0;
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacksum=MAX(slacksum,
		     cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      exit(1);
    }
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    modellength=model_length_s(svmModel,kparm);
    dualitygap=(0.5*modellength*modellength+sparm->C*(slacksum+ceps))
               -(alphasum-0.5*modellength*modellength);
    
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,ceps));
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Norm of weight vector: |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    printf("Value of slack variable (on working set): xi=%.5f\n",slacksum);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total,
	   (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, 
	   (100.0*rt_init)/rt_total);
  }
  if(ccache) {
    long cnum=0;
    CCACHEELEM *celem;
    for(i=0;i<n;i++) 
      for(celem=ccache->constlist[i];celem;celem=celem->next) 
	cnum++;
    printf("Final number of constraints in cache: %ld\n",cnum);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(ccache)    
    free_constraint_cache(ccache);
  for(i=0;i<n;i++)
    free_svector(fycache[i]);
  free(fycache);
  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(alphahist); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
  if(kparm->gram_matrix)
    free_matrix(kparm->gram_matrix);
}