Пример #1
0
static void bm_free_extra_models()
{
	while (N_polygon_models > N_D2_POLYGON_MODELS)
		free_model(&Polygon_models[--N_polygon_models]);
	while (N_polygon_models > exit_modelnum)
		free_model(&Polygon_models[--N_polygon_models]);
}
Пример #2
0
Classifier::~Classifier() {
    for (int i = 0; i < nFeatures; i++)
        delete featureExtractors[i];

    for (unsigned int i = 0; i < Globals::numZones; i++)
        free_model(models[i], 1);
}
Пример #3
0
void        free_struct_model(STRUCTMODEL sm) 
{
  /* Frees the memory of model. */
  /* if(sm.w) free(sm.w); */ /* this is free'd in free_model */
  if(sm.svm_model) free_model(sm.svm_model,1);
  /* add free calls for user defined data here */
}
Пример #4
0
int main (int argc, char* argv[])
{
  MODEL *model; 

  read_input_parameters(argc,argv,modelfile,outfile, &verbosity, &format);

  if (format) {
    model=read_binary_model(modelfile);
  } else {
    model=read_model(modelfile);
    if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
        /* compute weight vector */
        add_weight_vector_to_linear_model(model);
    }
  }
    if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
        FILE* modelfl = fopen (outfile, "wb");
        if (modelfl==NULL)
        { perror (modelfile); exit (1); }

        if (verbosity > 1)
            fprintf(modelfl,"B=%.32g\n",model->b);
        long i=0;
        for (i= 0; i< model->totwords; ++i) 
            fprintf(modelfl,"%.32g\n",model->lin_weights[i]);
    } else {
        fprintf(stderr,"No output besides linear models\n");
    }
  free_model(model,1);
  return(0);
}
int main (void)
{
    hid_t file_id;
    int status;
    detector_data *data;
    model *mod = NULL;
    double *data_stream = NULL;
    unsigned int *qual = NULL;

    /* open file -- ignore failure on this step */
    status = RD_open_file(TESTDATAFILE, &file_id);
    if (status) return TEST_EXIT_SKIP;

    /* initialize data struct */
    status = RD_init_data(file_id, 0, -1, &data);
    if (status) return TEST_EXIT_SKIP;

    /* get data */
    status = RD_get_data(file_id, data);
    if (status) return TEST_EXIT_SKIP;

    /* close file */
    status = RD_close_file(file_id);
    if (status) return TEST_EXIT_SKIP;

    /* setup flt model */
    status = setup_model(MODEL_TYPE_FLT, &mod, data, FILT_NPAD, FILT_SCALE);
    if (status) return TEST_EXIT_SKIP;

    /* initialize flt model */
    status = init_model(mod, data);
    if (status) return TEST_EXIT_SKIP;

    /* run flt model */
    data_stream = malloc(mod->ndet*mod->nsamp*sizeof(*data_stream));
    memcpy(data_stream, data->signal,
           mod->ndet*mod->nsamp*sizeof(*data_stream));

    qual = malloc(mod->ndet*mod->nsamp*sizeof(*qual));
    memcpy(qual, data->qual, mod->ndet*mod->nsamp*sizeof(*qual));

    status = model_flt_calc(0, 1, mod, data_stream, qual);
    if (status) return EXIT_FAILURE;

    free(data_stream);
    free(qual);

    /* free flt model */
    status = free_model(mod);
    if (status) return TEST_EXIT_SKIP;

    /* free data */
    status = RD_free_data(data);
    if (status) return TEST_EXIT_SKIP;

    /* success! */
    return EXIT_SUCCESS;
}
Пример #6
0
void free_polygon_models()
{
	int i;

	for (i=0;i<N_polygon_models;i++) {
		free_model(&Polygon_models[i]);
	}

}
Пример #7
0
int main(int argc, char **argv) {
  // Load the model from the file given as first non-option argument.
  // Normalization constant Z, will be calculated (takes long) if not provided.
  double Z = 0;
  int opt;
  while((opt = getopt(argc, argv, "z:")) != -1) {
    switch(opt) {
    case 'z': Z = atof(optarg); break;
    default: die("%s", usage);
    }
  }
  if (optind == argc) die("%s", usage);
  msg("Loading model from %s.", argv[optind]);
  if (Z > 0) msg("Will use fixed Z of %g.", Z);
  else msg("Will calculate Z, this may take some time...");
  model_t m = load_model(argv[optind], Z);

  // Read data from stdin and calculate logL
  char **toks = _d_calloc(m->ntok, sizeof(char *));
  svec_t *x = _d_calloc(m->ntok, sizeof(svec_t));
  double *logZ = _d_calloc(m->ntok, sizeof(double));
  for (size_t i = 1; i < m->ntok; i++) {
    logZ[i] = log(m->z[i]);
  }
  double logL = 0;
  size_t nline = 0;

  msg("Reading data from stdin (each dot = 1M lines)...");
  forline (line, NULL) {
    if ((++nline & ((1<<20)-1)) == 0) fputc('.', stderr);
    line[strlen(line)-1] = 0;	// chop newline
    size_t ntok = split(line, "\t", toks, m->ntok);
    if (ntok != m->ntok) die("Wrong number of columns.");
    for (size_t i = 0; i < m->ntok; i++) {
      if (*toks[i] == '\0') {
	x[i] = NULL;
      } else {
	x[i] = svec(m->v[i], toks[i]);
	assert(x[i]->vec != NULL && x[i]->cnt > 0);
      }
    }
    assert(x[0] != NULL);
    double logx = log(((double) x[0]->cnt) / m->n[0]);
    for (size_t i = 1; i < m->ntok; i++) {
      if (x[i] == NULL) continue;
      double logy = log(((double) x[i]->cnt) / m->n[i]);
      logL += logx + logy - logZ[i] - d2(x[0]->vec, x[i]->vec, m->ndim);
    }
  }
  fputc('\n', stderr);
  logL /= nline;
  _d_free(toks); _d_free(x); _d_free(logZ);
  free_model(m);
  msg("nlines=%zu avg-logL=%g", nline, logL);
}
Пример #8
0
int main(int argc, char **argv) {
  w2v_t w2v;
  if (load_model(argv[1], &w2v) < 0) {
    return -1;
  }

  char st1[kMaxSize], st2[kMaxSize];
  while (fscanf(stdin, "%s\t%s", st1, st2) == 2) {
    fprintf(stdout, "%s\t%s\t%f\n", st1, st2, cosine(&w2v, st1, st2));
  }

  free_model(&w2v);
  return 0;
}
Пример #9
0
int parse_model(const char* filename, track_model_t* model)
{
  FILE* file;
  file = fopen(filename, "r");
  if (file == 0) {
    fprintf(stderr, "Could not open %s.\n", filename);
    return -1;
  }

  model->nodes = 0;
  int rval = parse_model_file(file, model);
  if (rval < 0) {
    free_model(model);
  }
  
  fclose(file);
  return rval;
}
int main (void)
{
  hid_t file_id;
  int status;
  detector_data *data;
  model *mod;
  UNUSED unsigned long memused;

  /* open file -- ignore failure on this step */
  status = RD_open_file(TESTDATAFILE, &file_id);
  if (status) return TEST_EXIT_SKIP;

  /* initialize data struct */
  status = RD_init_data(file_id, 0, -1, &data);
  if (status) return TEST_EXIT_SKIP;

  /* get data */
  status = RD_get_data(file_id, data);
  if (status) return TEST_EXIT_SKIP;

  /* close file */
  status = RD_close_file(file_id);
  if (status) return TEST_EXIT_SKIP;

  /* setup model */
  status = setup_model(MODEL_TYPE_AST, &mod, data);
  if (status) return TEST_EXIT_SKIP;

  /* get memory */
  memused = getmem_model(mod);

  /* free ast model */
  status = free_model(mod);
  if (status) return EXIT_FAILURE;

  /* free data */
  status = RD_free_data(data);
  if (status) return TEST_EXIT_SKIP;

  /* success! */
  return EXIT_SUCCESS;
}
Пример #11
0
void        print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm,
					CONSTSET cset, double *alpha, 
					STRUCT_LEARN_PARM *sparm)
{
  /* This function is called after training and allows final touches to
     the model sm. But primarly it allows computing and printing any
     kind of statistic (e.g. training error) you might want. */

  /* Replace SV with single weight vector */
  MODEL *model=sm->svm_model;
  if(model->kernel_parm.kernel_type == LINEAR) {
    if(struct_verbosity>=1) {
      printf("Compacting linear model..."); fflush(stdout);
    }
    sm->svm_model=compact_linear_model(model);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
    free_model(model,1);
    if(struct_verbosity>=1) {
      printf("done\n"); fflush(stdout);
    }
  }  
}
Пример #12
0
int main(
	int argc,
	char *argv[]
) {
	int ret = EXIT_FAILURE;
	serial_port_t port;
	struct model *model = malloc(sizeof(struct model));
	struct activity *activity = malloc(sizeof(struct activity));
	model->num_theta_region = CONF_NUM_THETA_REGION;
	char chosen_username[NAME_LEN];
	char chosen_activity[NAME_LEN];
	if (prompt_activity_name(chosen_activity))
		goto ERROR;
	if (prompt_user_name(chosen_username))
		goto ERROR;
	strncpy(activity->name, chosen_activity, NAME_LEN);
	strncpy(activity->user, chosen_username, NAME_LEN);
	activity->name[strlen(chosen_activity)] = '\0';
	activity->user[strlen(chosen_username)] = '\0';
	if (init_model(model))
		goto ERROR;
	if (init_activity(activity, model))
		goto ERROR;
	if (check_file_exists(FILE_CONF_CALIBRATION))
		goto ERROR;
	if (serial_port_open(CONF_SERIAL_PORT, &port))
		goto ERROR;
	if (serial_port_configure(port))
		goto ERROR;
	if (real_time_listen(port, activity, model))
		goto ERROR;
	file_write_activity_code(activity->files[FILE_ACTIVITY_CODE], activity, model);
	ret = EXIT_SUCCESS;
	ERROR:
		serial_port_close(port);
		free_activity(activity);
		free_model(model);
		return ret;
}
Пример #13
0
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
		      LEARN_PARM *lparm, KERNEL_PARM *kparm, 
		      STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        newconstraints=0, totconstraints=0, activenum=0; 
  int         opti_round, *opti, fullround, use_shrinking;
  long        old_totconstraints=0;
  double      epsilon,svmCnorm;
  long        tolerance,new_precision=1,dont_stop=0;
  double      lossval,factor,dist;
  double      margin=0;
  double      slack, *slacks, slacksum, ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0,lastoptcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  SVECTOR     *fy, *fybar, *f, **fycache=NULL;
  SVECTOR     *slackvec;
  WORD        slackv[2];
  MODEL       *svmModel=NULL;
  KERNEL_CACHE *kcache=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0;
  double      rt1,rt2;

  rt1=get_runtime();

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  /* initialize shrinking-style example selection heuristic */ 
  if(alg_type == NSLACK_SHRINK_ALG)
    use_shrinking=1;
  else
    use_shrinking=0;
  opti=(int*)my_malloc(n*sizeof(int));
  for(i=0;i<n;i++) {
    opti[i]=0;
  }
  opti_round=0;

  /* normalize regularization parameter C by the number of training examples */
  svmCnorm=sparm->C/n;

  if(sparm->slack_norm == 1) {
    lparm->svm_c=svmCnorm;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */
    lparm->sharedslack=0;
    if(kparm->kernel_type != LINEAR_KERNEL) {
      printf("ERROR: Kernels are not implemented for L2 slack norm!"); 
      fflush(stdout);
      exit(0); 
    }
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  epsilon=100.0;                  /* start with low precision and
				     increase later */
  tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less
                                     than that number of constraints
                                     is not fulfilled */
  lparm->biased_hyperplane=0;     /* set threshold to zero */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }

  /* set initial model and slack variables*/
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  if(kparm->kernel_type != LINEAR_KERNEL)
    kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size);
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,kcache,svmModel,alpha);
  if(kcache)
    kernel_cache_cleanup(kcache);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  if(USE_FYCACHE) {
    fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *));
    for(i=0;i<n;i++) {
      fy=psi(ex[i].x,ex[i].y,sm,sparm);
      if(kparm->kernel_type == LINEAR_KERNEL) {
	diff=add_list_ss(fy); /* store difference vector directly */
	free_svector(fy);
	fy=diff;
      }
      fycache[i]=fy;
    }
  }

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=MAX(get_runtime()-rt1,0);

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively increase precision */

    epsilon=MAX(epsilon*0.49999999999,sparm->epsilon);
    new_precision=1;
    if(epsilon == sparm->epsilon)   /* for final precision, find all SV */
      tolerance=0; 
    lparm->epsilon_crit=epsilon/2;  /* svm precision must be higher than eps */
    if(struct_verbosity>=1)
      printf("Setting current working precision to %g.\n",epsilon);

    do { /* iteration until (approx) all SV are found for current
            precision and tolerance */
      
      opti_round++;
      activenum=n;
      dont_stop=0;
      old_totconstraints=totconstraints;

      do { /* with shrinking turned on, go through examples that keep
	      producing new constraints */

	if(struct_verbosity>=1) { 
	  printf("Iter %i (%ld active): ",++numIt,activenum); 
	  fflush(stdout);
	}
	
	ceps=0;
	fullround=(activenum == n);

	for(i=0; i<n; i++) { /*** example loop ***/
	  
	  rt1=get_runtime();
	    
	  if((!use_shrinking) || (opti[i] != opti_round)) {
	                                /* if the example is not shrunk
	                                away, then see if it is necessary to 
					add a new constraint */
	    rt2=get_runtime();
	    argmax_count++;
	    if(sparm->loss_type == SLACK_RESCALING) 
	      ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
								ex[i].y,sm,
								sparm);
	    else
	      ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
								 ex[i].y,sm,
								 sparm);
	    rt_viol+=MAX(get_runtime()-rt2,0);
	    
	    if(empty_label(ybar)) {
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	      if(struct_verbosity>=2)
		printf("no-incorrect-found(%i) ",i);
	      continue;
	    }
	  
	    /**** get psi(y)-psi(ybar) ****/
	    rt2=get_runtime();
	    if(fycache) 
	      fy=copy_svector(fycache[i]);
	    else
	      fy=psi(ex[i].x,ex[i].y,sm,sparm);
	    fybar=psi(ex[i].x,ybar,sm,sparm);
	    rt_psi+=MAX(get_runtime()-rt2,0);
	    
	    /**** scale feature vector and margin by loss ****/
	    lossval=loss(ex[i].y,ybar,sparm);
	    if(sparm->slack_norm == 2)
	      lossval=sqrt(lossval);
	    if(sparm->loss_type == SLACK_RESCALING)
	      factor=lossval;
	    else               /* do not rescale vector for */
	      factor=1.0;      /* margin rescaling loss type */
	    for(f=fy;f;f=f->next)
	      f->factor*=factor;
	    for(f=fybar;f;f=f->next)
	      f->factor*=-factor;
	    margin=lossval;

	    /**** create constraint for current ybar ****/
	    append_svector_list(fy,fybar);/* append the two vector lists */
	    doc=create_example(cset.m,0,i+1,1,fy);

	    /**** compute slack for this example ****/
	    slack=0;
	    for(j=0;j<cset.m;j++) 
	      if(cset.lhs[j]->slackid == i+1) {
		if(sparm->slack_norm == 2) /* works only for linear kernel */
		  slack=MAX(slack,cset.rhs[j]
			          -(classify_example(svmModel,cset.lhs[j])
				    -sm->w[sizePsi+i]/(sqrt(2*svmCnorm))));
		else
		  slack=MAX(slack,
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
	      }
	    
	    /**** if `error' add constraint and recompute ****/
	    dist=classify_example(svmModel,doc);
	    ceps=MAX(ceps,margin-dist-slack);
	    if(slack > (margin-dist+0.0001)) {
	      printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	      printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	      printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist);
	      /* exit(1); */
	    }
	    if((dist+slack)<(margin-epsilon)) { 
	      if(struct_verbosity>=2)
		{printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);}
	      if(struct_verbosity==1)
		{printf("."); fflush(stdout);}
	      
	      /**** resize constraint matrix and add new constraint ****/
	      cset.m++;
	      cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m);
	      if(kparm->kernel_type == LINEAR_KERNEL) {
		diff=add_list_ss(fy); /* store difference vector directly */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(diff));
		else if(sparm->slack_norm == 2) {
		  /**** add squared slack variable to feature vector ****/
		  slackv[0].wnum=sizePsi+i;
		  slackv[0].weight=1/(sqrt(2*svmCnorm));
		  slackv[1].wnum=0; /*terminator*/
		  slackvec=create_svector(slackv,NULL,1.0);
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    add_ss(diff,slackvec));
		  free_svector(slackvec);
		}
		free_svector(diff);
	      }
	      else { /* kernel is used */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(fy));
		else if(sparm->slack_norm == 2)
		  exit(1);
	      }
	      cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m);
	      cset.rhs[cset.m-1]=margin;
	      alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
	      alpha[cset.m-1]=0;
	      alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
	      alphahist[cset.m-1]=optcount;
	      newconstraints++;
	      totconstraints++;
	    }
	    else {
	      printf("+"); fflush(stdout); 
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	    }

	    free_example(doc,0);
	    free_svector(fy); /* this also free's fybar */
	    free_label(ybar);
	  }

	  /**** get new QP solution ****/
	  if((newconstraints >= sparm->newconstretrain) 
	     || ((newconstraints > 0) && (i == n-1))
	     || (new_precision && (i == n-1))) {
	    if(struct_verbosity>=1) {
	      printf("*");fflush(stdout);
	    }
	    rt2=get_runtime();
	    free_model(svmModel,0);
	    svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	    /* Always get a new kernel cache. It is not possible to use the
	       same cache for two different training runs */
	    if(kparm->kernel_type != LINEAR_KERNEL)
	      kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size);
	    /* Run the QP solver on cset. */
	    svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
				   lparm,kparm,kcache,svmModel,alpha);
	    if(kcache)
	      kernel_cache_cleanup(kcache);
	    /* Always add weight vector, in case part of the kernel is
	       linear. If not, ignore the weight vector since its
	       content is bogus. */
	    add_weight_vector_to_linear_model(svmModel);
	    sm->svm_model=svmModel;
	    sm->w=svmModel->lin_weights; /* short cut to weight vector */
	    optcount++;
	    /* keep track of when each constraint was last
	       active. constraints marked with -1 are not updated */
	    for(j=0;j<cset.m;j++) 
	      if((alphahist[j]>-1) && (alpha[j] != 0))  
		alphahist[j]=optcount;
	    rt_opt+=MAX(get_runtime()-rt2,0);
	    
	    if(new_precision && (epsilon <= sparm->epsilon))  
	      dont_stop=1; /* make sure we take one final pass */
	    new_precision=0;
	    newconstraints=0;
	  }	

	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();
	
	if(struct_verbosity>=1)
	  printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
		 svmModel->sv_num-1,ceps,svmModel->maxdiff);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=2)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,
				    MAX(50,optcount-lastoptcount));
	lastoptcount=optcount;
	if(struct_verbosity>=2)
	  printf("done. (NumConst=%d)\n",cset.m);
	
	rt_total+=MAX(get_runtime()-rt1,0);
	
      } while(use_shrinking && (activenum > 0)); /* when using shrinking, 
						    repeat until all examples 
						    produced no constraint at
						    least once */

    } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop);

  } while((epsilon > sparm->epsilon) 
	  || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm));  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    /**** WARNING: If positivity constraints are used, then the
	  maximum slack id is larger than what is allocated
	  below ****/
    slacks=(double *)my_malloc(sizeof(double)*(n+1));
    for(i=0; i<=n; i++) { 
      slacks[i]=0;
    }
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
		cset.rhs[j]
	         -(classify_example(svmModel,cset.lhs[j])
		   -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm))));
    }
    slacksum=0;
    for(i=1; i<=n; i++)  
      slacksum+=slacks[i];
    free(slacks);
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    modellength=model_length_s(svmModel);
    dualitygap=(0.5*modellength*modellength+svmCnorm*(slacksum+n*ceps))
               -(alphasum-0.5*modellength*modellength);
    
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,epsilon));
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
	     svmModel->at_upper_bound,n);
      printf("Norm of weight vector: |w|=%.5f\n",modellength);
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     modellength);
    }
    printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, 
	   (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(fycache) {
    for(i=0;i<n;i++)
      free_svector(fycache[i]);
    free(fycache);
  }
  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(alphahist); 
  free(opti); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
}
Пример #14
0
static void bm_free_extra_models()
{
	const auto base = std::min(N_D2_POLYGON_MODELS.value, exit_modelnum);
	range_for (auto &p, partial_range(Polygon_models, base, exchange(N_polygon_models, base)))
		free_model(p);
}
Пример #15
0
void load_robot_replacements(const d_fname &level_name)
{
	int t,i,j;
	char ifile_name[FILENAME_LEN];

	change_filename_extension(ifile_name, level_name, ".HXM" );

	auto fp = PHYSFSX_openReadBuffered(ifile_name);
	if (!fp)		//no robot replacement file
		return;

	t = PHYSFSX_readInt(fp);			//read id "HXM!"
	if (t!= 0x21584d48)
		Error("ID of HXM! file incorrect");

	t = PHYSFSX_readInt(fp);			//read version
	if (t<1)
		Error("HXM! version too old (%d)",t);

	t = PHYSFSX_readInt(fp);			//read number of robots
	for (j=0;j<t;j++) {
		i = PHYSFSX_readInt(fp);		//read robot number
		if (i<0 || i>=N_robot_types)
			Error("Robots number (%d) out of range in (%s).  Range = [0..%d].",i,static_cast<const char *>(level_name),N_robot_types-1);
		robot_info_read(fp, Robot_info[i]);
	}

	t = PHYSFSX_readInt(fp);			//read number of joints
	for (j=0;j<t;j++) {
		i = PHYSFSX_readInt(fp);		//read joint number
		if (i<0 || i>=N_robot_joints)
			Error("Robots joint (%d) out of range in (%s).  Range = [0..%d].",i,static_cast<const char *>(level_name),N_robot_joints-1);
		jointpos_read(fp, Robot_joints[i]);
	}

	t = PHYSFSX_readInt(fp);			//read number of polygon models
	for (j=0;j<t;j++)
	{
		i = PHYSFSX_readInt(fp);		//read model number
		if (i<0 || i>=N_polygon_models)
			Error("Polygon model (%d) out of range in (%s).  Range = [0..%d].",i,static_cast<const char *>(level_name),N_polygon_models-1);

		free_model(Polygon_models[i]);
		polymodel_read(&Polygon_models[i], fp);
		polygon_model_data_read(&Polygon_models[i], fp);

		Dying_modelnums[i] = PHYSFSX_readInt(fp);
		Dead_modelnums[i] = PHYSFSX_readInt(fp);
	}

	t = PHYSFSX_readInt(fp);			//read number of objbitmaps
	for (j=0;j<t;j++) {
		i = PHYSFSX_readInt(fp);		//read objbitmap number
		if (i < 0 || i >= ObjBitmaps.size())
			Error("Object bitmap number (%d) out of range in (%s).  Range = [0..%" DXX_PRI_size_type "].", i, static_cast<const char *>(level_name), ObjBitmaps.size() - 1);
		bitmap_index_read(fp, ObjBitmaps[i]);
	}

	t = PHYSFSX_readInt(fp);			//read number of objbitmapptrs
	for (j=0;j<t;j++) {
		i = PHYSFSX_readInt(fp);		//read objbitmapptr number
		if (i < 0 || i >= ObjBitmapPtrs.size())
			Error("Object bitmap pointer (%d) out of range in (%s).  Range = [0..%" DXX_PRI_size_type "].", i, static_cast<const char *>(level_name), ObjBitmapPtrs.size() - 1);
		ObjBitmapPtrs[i] = PHYSFSX_readShort(fp);
	}
	Robot_replacements_loaded = 1;
}
Пример #16
0
void cutting_plane_algorithm_dual(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j;
  double *alpha;
  DOC **dXc; // constraint matrix 
  double *delta; // rhs of constraints 
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **G = NULL;
	double **G2 = NULL;
	double **qmatrix = NULL;
	SVECTOR *f;
	int r;

  // set parameters for hideo solver 
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  // changed from 1e-15 
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  //qmatrix = (double **) malloc(sizeof(double *)*10);
  //assert(qmatrix!=NULL);

  printf("Running structural SVM solver: "); fflush(stdout); 
	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    // add  constraint 
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;


	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	//alpha = (double*)malloc(sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	//assert(alpha!=NULL);
   		//for(j=0; j<(sparm->phi1_size+sparm->phi2_size)+size_active; j++){
   		//	alpha[j] = 0.0;
   		//}
   		alpha = (double*)realloc(alpha, sizeof(double)*(size_active+(sparm->phi1_size+sparm->phi2_size)));
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;

		idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;

		
		qmatrix = (double **) realloc(qmatrix, sizeof(double *)*size_active);
  		assert(qmatrix!=NULL);

		qmatrix[size_active-1] = malloc(sizeof(double)*(sparm->phi1_size+sparm->phi2_size));
		for(j = 0; j < (sparm->phi1_size+sparm->phi2_size); j++){
			qmatrix[size_active-1][j] = (-1)*returnWeightAtIndex(dXc[size_active-1]->fvec->words, ((sparm->phi1_size+sparm->phi2_size)*2+j+1));
		}

		// update Gram matrix 
		G = (double **) realloc(G, sizeof(double *)*size_active);
		assert(G!=NULL);
		G[size_active-1] = NULL;
		for(j = 0; j < size_active; j++) {
			G[j] = (double *) realloc(G[j], sizeof(double)*size_active);
			assert(G[j]!=NULL);
		}

		for(j = 0; j < size_active-1; j++) {
			G[size_active-1][j] = sprod_ss(dXc[size_active-1]->fvec, dXc[j]->fvec);
			G[size_active-1][j] = G[size_active-1][j]/2;
			G[j][size_active-1]  = G[size_active-1][j];
		}
		G[size_active-1][size_active-1] = sprod_ss(dXc[size_active-1]->fvec,dXc[size_active-1]->fvec);

		// hack: add a constant to the diagonal to make sure G is PSD 
		G[size_active-1][size_active-1] += 1e-6;

	   	// solve QP to update alpha 
		//r = mosek_qp_optimize(G, delta, alpha, (long) size_active, C, &cur_obj, dXc, (sparm->phi1_size+sparm->phi2_size)*2, (sparm->phi1_size+sparm->phi2_size));
		r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, 0, 0);
	    
		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			fflush(stdout);
			//exit(1);
			while(r==1295) {
				printf("r:%d. G might not be psd due to numerical errors. Gram Reg=%0.7f\n",r, sparm->gram_regularization);
				fflush(stdout);
				for(i=0;i<size_active;i++) {
					G[i][i] += 10*sparm->gram_regularization-sparm->gram_regularization;
				}
				sparm->gram_regularization *= 10;
				r = mosek_qp_optimize_dual(G, qmatrix, delta, alpha, (long) size_active, (long) (sparm->phi1_size+sparm->phi2_size), C, &cur_obj, sparm->gram_regularization, sparm->gram_regularization*0.1);
			}
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

	   	clear_nvector(w,sm->sizePsi);
	   	for (j=0;j<size_active;j++) {
	     	if (alpha[j]>C*ALPHA_THRESHOLD) {
					add_vector_ns(w,dXc[j]->fvec,alpha[j]);
					idle[j] = 0;
	     	}
			else
				idle[j]++;
	   	}
	   	for(j=0; j<(sparm->phi1_size+sparm->phi2_size);j++){
	   		if (alpha[size_active+j] > EQUALITY_EPSILON){
	   			w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] = w[j+1+(sparm->phi1_size+sparm->phi2_size)*2] - alpha[size_active+j];
	   		}	   		
	   	}

	   	for(j=1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
	   		}
	   	}	   

	   	for(j=(sparm->phi1_size+sparm->phi2_size)*2+1; j<=(sparm->phi1_size+sparm->phi2_size)*3;j++){
	   		//assert(w[j] <= 0);
	   		if(w[j]>0){
	   			printf("j = %ld, w[j] = %0.6f\n", j, w[j]);
	   			fflush(stdout);
	   		}
	   		
	   	}	

		cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);

		for(i = 0; i < size_active; i++) {
			cur_slack[i] = 0.0;
			for(f = dXc[i]->fvec; f; f = f->next) {
				j = 0;
				while(f->words[j].wnum) {
					cur_slack[i] += w[f->words[j].wnum]*f->words[j].weight;
					j++;
				}
			}
			if(cur_slack[i] >= delta[i])
				cur_slack[i] = 0.0;
			else
				cur_slack[i] = delta[i]-cur_slack[i];
		}

		mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}

		if(size_active > 1)
			threshold = cur_slack[mv_iter];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &alpha, &delta, &dXc, &G, &mv_iter);
		}

		free(alpha);
		alpha=NULL;

 	} // end cutting plane while loop 

	//primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  // free memory
  for (j=0;j<size_active;j++) {
		free(G[j]);
    free_example(dXc[j],1);	
  }
	free(G);
  free(dXc);
  free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  //return(primal_obj);
  return;
}
Пример #17
0
int _svm_learn (int argc, char* argv[])
{  
  char docfile[200];           /* file with training examples */
  char modelfile[200];         /* file for resulting classifier */
  char restartfile[200];       /* file with initial alphas */
  DOC **docs;  /* training examples */
  long totwords,totdoc,i;
  double *target;
  double *alpha_in=NULL;
  KERNEL_CACHE *kernel_cache;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

  HIDEO_ENV *hideo_env=create_env();

  model->td_pred=NULL;
  model->n_td_pred=0;

  _read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity,
			&learn_parm,&kernel_parm);
  read_documents(docfile,&docs,&target,&totwords,&totdoc);
  if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);

  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
    kernel_cache=NULL;
  }
  else {
    /* Always get a new kernel cache. It is not possible to use the
       same cache for two different training runs */
    kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
  }

  if(learn_parm.type == CLASSIFICATION) {
    svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			     &kernel_parm,kernel_cache,model,alpha_in,hideo_env);
  }
  else if(learn_parm.type == REGRESSION) {
    svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			 &kernel_parm,&kernel_cache,model,hideo_env);
  }
  else if(learn_parm.type == RANKING) {
    svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
		      &kernel_parm,&kernel_cache,model,hideo_env);
  }
  else if(learn_parm.type == OPTIMIZATION) {
    svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,kernel_cache,model,alpha_in,hideo_env);
  }

  if(kernel_cache) {
    /* Free the memory used for the cache. */
    kernel_cache_cleanup(kernel_cache);
  }

  /* Warning: The model contains references to the original data 'docs'.
     If you want to free the original data, and only keep the model, you 
     have to make a deep copy of 'model'. */
  /* deep_copy_of_model=copy_model(model); */
  write_model(modelfile,model);

  free(alpha_in);
  free_model(model,0);
  for(i=0;i<totdoc;i++) 
    free_example(docs[i],1);
  free(docs);
  free(target);
  free_env(hideo_env);

  return(0);
}
Пример #18
0
int main_classify (int argc, char* argv[])
{
  DOC *doc;   /* test example */
  WORDSVM *words;
  long max_docs,max_words_doc,lld;
  long totdoc=0,queryid,slackid;
  long correct=0,incorrect=0,no_accuracy=0;
  long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
  long j;
  double t1,runtime=0;
  double dist,doc_label,costfactor;
  char *line,*comment; 
  FILE *predfl,*docfl;
  MODEL *model; 

  read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
			&verbosity,&pred_format);

  nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
  max_words_doc+=2;
  lld+=2;

  line = (char *)my_malloc(sizeof(char)*lld);
  words = (WORDSVM *)my_malloc(sizeof(WORDSVM)*(max_words_doc+10));

  model=read_model(modelfile);

  if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
    /* compute weight vector */
    add_weight_vector_to_linear_model(model);
  }
  
  if(verbosity>=2) {
    printf("Classifying test examples.."); fflush(stdout);
  }

  if ((docfl = fopen (docfile, "r")) == NULL)
  { perror (docfile); exit (1); }
  if ((predfl = fopen (predictionsfile, "w")) == NULL)
  { perror (predictionsfile); exit (1); }

  while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
    if(line[0] == '#') continue;  /* line contains comments */
    parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
		   max_words_doc,&comment);
    totdoc++;
    if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
      for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
	if((words[j]).wnum>model->totwords) /* are not larger than in     */
	  (words[j]).wnum=0;               /* model. Remove feature if   */
      }                                        /* necessary.                 */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example_linear(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    else {                             /* non-linear kernel */
      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
      t1=get_runtime();
      dist=classify_example(model,doc);
      runtime+=(get_runtime()-t1);
      free_example(doc,1);
    }
    if(dist>0) {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
      }
      if(doc_label>0) correct++; else incorrect++;
      if(doc_label>0) res_a++; else res_b++;
    }
    else {
      if(pred_format==0) { /* old weired output format */
	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
      }
      if(doc_label<0) correct++; else incorrect++;
      if(doc_label>0) res_c++; else res_d++;
    }
    if(pred_format==1) { /* output the value of decision function */
      fprintf(predfl,"%.8g\n",dist);
    }
    if((int)(0.01+(doc_label*doc_label)) != 1) 
      { no_accuracy=1; } /* test data is not binary labeled */
    if(verbosity>=2) {
      if(totdoc % 100 == 0) {
	printf("%ld..",totdoc); fflush(stdout);
      }
    }
  }  
  free(line);
  free(words);
  free_model(model,1);

  if(verbosity>=2) {
    printf("done\n");

/*   Note by Gary Boone                     Date: 29 April 2000        */
/*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
/*        Because classification of a single vector takes less than    */
/*        0.01 secs, the timer was underflowing.                       */
    printf("Runtime (without IO) in cpu-seconds: %.2f\n",
	   (float)(runtime/100.0));
    
  }
  if((!no_accuracy) && (verbosity>=1)) {
    printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
    printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
  }

  return(0);
}
Пример #19
0
double cutting_plane_algorithm(double *w, long m, int MAX_ITER, double C, double epsilon, SVECTOR **fycache, EXAMPLE *ex, 
															STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, int *valid_examples) {
  long i,j,t;
  double *alpha;
  DOC **dXc; /* constraint matrix */
  double *delta; /* rhs of constraints */
  SVECTOR *new_constraint;
  int iter, size_active; 
  double value;
	double threshold = 0.0;
  double margin;
  double primal_obj, cur_obj;
	double *cur_slack = NULL;
	int mv_iter;
	int *idle = NULL;
	double **psiDiffs = NULL;
	SVECTOR *f;
	int r;
	long fnum, last_wnum;

  /* set parameters for hideo solver */
  LEARN_PARM lparm;
  KERNEL_PARM kparm;
  MODEL *svm_model=NULL;
  lparm.biased_hyperplane = 0;
  lparm.epsilon_crit = MIN(epsilon,0.001);
  lparm.svm_c = C;
  lparm.sharedslack = 1;
  kparm.kernel_type = LINEAR;

  lparm.remove_inconsistent=0;
  lparm.skip_final_opt_check=0;
  lparm.svm_maxqpsize=10;
  lparm.svm_newvarsinqp=0;
  lparm.svm_iter_to_shrink=-9999;
  lparm.maxiter=100000;
  lparm.kernel_cache_size=40;
  lparm.eps = epsilon; 
  lparm.transduction_posratio=-1.0;
  lparm.svm_costratio=1.0;
  lparm.svm_costratio_unlab=1.0;
  lparm.svm_unlabbound=1E-5;
  lparm.epsilon_a=1E-10;  /* changed from 1e-15 */
  lparm.compute_loo=0;
  lparm.rho=1.0;
  lparm.xa_depth=0;
  strcpy(lparm.alphafile,"");
  kparm.poly_degree=3;
  kparm.rbf_gamma=1.0;
  kparm.coef_lin=1;
  kparm.coef_const=1;
  strcpy(kparm.custom,"empty");
 
  iter = 0;
  size_active = 0;
  alpha = NULL;
  dXc = NULL;
  delta = NULL;

  printf("Running structural SVM solver: "); fflush(stdout); 

	new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
 	value = margin - sprod_ns(w, new_constraint);
	while((value>threshold+epsilon)&&(iter<MAX_ITER)) {
		iter+=1;
		size_active+=1;

		printf("."); fflush(stdout); 


	    /* add  constraint */
	  	dXc = (DOC**)realloc(dXc, sizeof(DOC*)*size_active);
	   	assert(dXc!=NULL);
	   	dXc[size_active-1] = (DOC*)malloc(sizeof(DOC));
	   	dXc[size_active-1]->fvec = new_constraint; 
	   	dXc[size_active-1]->slackid = 1; // only one common slackid (one-slack)
	   	dXc[size_active-1]->costfactor = 1.0;

	   	delta = (double*)realloc(delta, sizeof(double)*size_active);
	   	assert(delta!=NULL);
	   	delta[size_active-1] = margin;

	   	/*alpha = (double*)realloc(alpha, sizeof(double)*size_active);
	   	assert(alpha!=NULL);
	   	alpha[size_active-1] = 0.0;*/

		/*idle = (int *) realloc(idle, sizeof(int)*size_active);
		assert(idle!=NULL);
		idle[size_active-1] = 0;*/

		/* update Gram matrix */
		psiDiffs = (double **) realloc(psiDiffs, sizeof(double *)*size_active);
		assert(psiDiffs!=NULL);
		psiDiffs[size_active-1] = NULL;
		psiDiffs[size_active-1] = (double *) realloc(psiDiffs[size_active-1], sizeof(double)*((sparm->phi1_size+sparm->phi2_size)*3));
		assert(psiDiffs[size_active-1]!=NULL);
		
		fnum = 0;
		last_wnum = 0;
		while(dXc[size_active-1]->fvec->words[fnum].wnum) {
			for (t = last_wnum+1; t < dXc[size_active-1]->fvec->words[fnum].wnum; t++)	{
				psiDiffs[size_active-1][t-1] = 0;
			}
			psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = dXc[size_active-1]->fvec->words[fnum].weight;
			/*if((psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]<EQUALITY_EPSILON) && (psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1]>(-1*EQUALITY_EPSILON))){
				psiDiffs[size_active-1][dXc[size_active-1]->fvec->words[fnum].wnum-1] = 0;
			}*/
			last_wnum = dXc[size_active-1]->fvec->words[fnum].wnum;
			fnum++;
		}
		for (t = (last_wnum+1); t <= (sparm->phi1_size+sparm->phi2_size)*3; t++)	{
			psiDiffs[size_active-1][t-1] = 0;
		}			

   		/* solve QP to update w */
   		clear_nvector(w,sm->sizePsi);
   		//cur_slack = (double *) realloc(cur_slack,sizeof(double)*size_active);
   		cur_slack = (double *) realloc(cur_slack,sizeof(double));

		r = mosek_qp_optimize(psiDiffs, delta, w, cur_slack, (long) size_active, C, &cur_obj, (sparm->phi1_size+sparm->phi2_size)*3, (sparm->phi1_size+sparm->phi2_size)*2);

		if(r >= 1293 && r <= 1296)
		{
			printf("r:%d. G might not be psd due to numerical errors.\n",r);
			exit(1);
		}
		else if(r)
		{
			printf("Error %d in mosek_qp_optimize: Check ${MOSEKHOME}/${VERSION}/tools/platform/${PLATFORM}/h/mosek.h\n",r);
			exit(1);
		}

		for(j = 1; j <= (sparm->phi1_size+sparm->phi2_size)*3; j++) {
			if((w[j]<EQUALITY_EPSILON) && (w[j]>(-1*EQUALITY_EPSILON))){
	   			w[j] = 0;
   			}
		}

		/*for (j=0;j<size_active;j++) {
	     	if (cur_slack[j]>ALPHA_THRESHOLD) {
					idle[j] = 0;
	     	}
				else
					idle[j]++;
   		}*/

		/*mv_iter = 0;
		if(size_active > 1) {
			for(j = 0; j < size_active; j++) {
				if(cur_slack[j] >= cur_slack[mv_iter])
					mv_iter = j;
			}
		}*/

		if(size_active > 1)
			//threshold = cur_slack[mv_iter];
			threshold = cur_slack[0];
		else
			threshold = 0.0;

 		new_constraint = find_cutting_plane(ex, fycache, &margin, m, sm, sparm, valid_examples);
   		value = margin - sprod_ns(w, new_constraint);

		/*if((iter % CLEANUP_CHECK) == 0)
		{
			printf("+"); fflush(stdout);
			size_active = resize_cleanup(size_active, &idle, &cur_slack, &delta, &dXc, &psiDiffs, &mv_iter);
		}*/

 	} // end cutting plane while loop 

	primal_obj = current_obj_val(ex, fycache, m, sm, sparm, C, valid_examples);

  printf(" Inner loop optimization finished.\n"); fflush(stdout); 
      
  /* free memory */
  for (j=0;j<size_active;j++) {
		free(psiDiffs[j]);
    free_example(dXc[j],1);	
  }
	free(psiDiffs);
  free(dXc);
  //free(alpha);
  free(delta);
  free_svector(new_constraint);
	free(cur_slack);
	//free(idle);
  if (svm_model!=NULL) free_model(svm_model,0);

  return(primal_obj);
}
Пример #20
0
int SVMLightRunner::librarySVMClassifyMain(
    int argc, char **argv, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMClassifyMain() Started."
    );
    DOC *doc;   /* test example */
    WORD *words;
    long max_docs,max_words_doc,lld;
    long totdoc=0,queryid,slackid;
    long correct=0,incorrect=0,no_accuracy=0;
    long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
    long j;
    double t1,runtime=0;
    double dist,doc_label,costfactor;
    char *line,*comment; 
    FILE *predfl,*docfl;
    MODEL *model; 

    // GMUM.R changes {
    librarySVMClassifyReadInputParameters(
        argc, argv, docfile, modelfile, predictionsfile, &verbosity,
        &pred_format, use_gmumr, config);

    if (!use_gmumr) {
        nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
        lld+=2;

        line = (char *)my_malloc(sizeof(char)*lld);
    } else {
        max_docs = config.target.n_rows;
        max_words_doc = config.getDataDim();
        config.result = arma::zeros<arma::vec>(max_docs);
        // Prevent writing to the file
        pred_format = -1;
        // lld used only for file reading
    }
    max_words_doc+=2;
    words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
    // GMUM.R changes }

    model=libraryReadModel(modelfile, use_gmumr, config);
    // GMUM.R changes }

    if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
      /* compute weight vector */
      add_weight_vector_to_linear_model(model);
    }
    
    if(verbosity>=2) {
      C_PRINTF("Classifying test examples.."); C_FFLUSH(stdout);
    }

    // GMUM.R changes {
    bool newline;
    if (!use_gmumr) {
        if ((predfl = fopen (predictionsfile, "w")) == NULL)
        { perror (predictionsfile); EXIT (1); }
        if ((docfl = fopen (docfile, "r")) == NULL)
        { perror (docfile); EXIT (1); }

        newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
    } else {
        newline = false;
        if (totdoc < config.getDataExamplesNumber()) {
            newline = true;
            std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
            line = new char[str.size() + 1];
            std::copy(str.begin(), str.end(), line);
            line[str.size()] = '\0';
        }
    }
    while(newline) {
      if (use_gmumr) {
            std::string stringline = "";
      }
      // GMUM.R changes }
      if(line[0] == '#') continue;  /* line contains comments */
      parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
             max_words_doc,&comment);
      totdoc++;
      if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
        for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
      if((words[j]).wnum>model->totwords) /* are not larger than in     */
        (words[j]).wnum=0;               /* model. Remove feature if   */
        }                                        /* necessary.                 */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example_linear(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      else {                             /* non-linear kernel */
        doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
        t1=get_runtime();
        dist=classify_example(model,doc);
        runtime+=(get_runtime()-t1);
        free_example(doc,1);
      }
      if(dist>0) {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
        }
        if(doc_label>0) correct++; else incorrect++;
        if(doc_label>0) res_a++; else res_b++;
      }
      else {
        if(pred_format==0) { /* old weired output format */
      C_FPRINTF(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
        }
        if(doc_label<0) correct++; else incorrect++;
        if(doc_label>0) res_c++; else res_d++;
      }
      if(pred_format==1) { /* output the value of decision function */
        C_FPRINTF(predfl,"%.8g\n",dist);
      }
      if((int)(0.01+(doc_label*doc_label)) != 1)
        { no_accuracy=1; } /* test data is not binary labeled */
      if(verbosity>=2) {
        if(totdoc % 100 == 0) {
      C_PRINTF("%ld..",totdoc); C_FFLUSH(stdout);
        }
      }
      // GMUM.R changes {
      if (!use_gmumr) {
          newline = (!feof(docfl)) && fgets(line,(int)lld,docfl);
      } else {
          newline = false;
          // Store prediction result in config
          config.result[totdoc-1] = dist;
          // Read next line
          if (totdoc < config.getDataExamplesNumber()) {
              newline = true;
              std::string str = SVMConfigurationToSVMLightLearnInputLine(config, totdoc);
              line = new char[str.size() + 1];
              std::copy(str.begin(), str.end(), line);
              line[str.size()] = '\0';
          }
      }
    }
    if (!use_gmumr) {
        fclose(predfl);
        fclose(docfl);
        free(line);
    }
    // GMUM.R changes }
    free(words);
    free_model(model,1);

    if(verbosity>=2) {
      C_PRINTF("done\n");

  /*   Note by Gary Boone                     Date: 29 April 2000        */
  /*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
  /*        Because classification of a single vector takes less than    */
  /*        0.01 secs, the timer was underflowing.                       */
      C_PRINTF("Runtime (without IO) in cpu-seconds: %.2f\n",
         (float)(runtime/100.0));

    }
    if((!no_accuracy) && (verbosity>=1)) {
      C_PRINTF("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
      C_PRINTF("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
    }

    return(0);
}
Пример #21
0
MedSTC::~MedSTC(void)
{
	free_model();
}
Пример #22
0
/*************************************************************************
 * Entry point for pmp_bf
 *************************************************************************/
int main(int argc, char *argv[]) {

  char* bg_filename = NULL;
  char* motif_name = "motif"; // Use this motif name in the output.
  STRING_LIST_T* selected_motifs = NULL;
  double fg_rate = 1.0;
  double bg_rate = 1.0;
  double purine_pyrimidine = 1.0; // r
  double transition_transversion = 0.5; // R
  double pseudocount = 0.1;
  GAP_SUPPORT_T gap_support = SKIP_GAPS;
  MODEL_TYPE_T model_type = F81_MODEL;
  BOOLEAN_T use_halpern_bruno = FALSE;
  char* ustar_label = NULL;	// TLB; create uniform star tree
  int i;

  program_name = "pmp_bf";

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/

  // Define command line options. (FIXME: Repeated code)
  // FIXME: Note that if you add or remove options you
  // must change n_options.
  int n_options = 12;
  cmdoption const pmp_options[] = {
    {"hb", NO_VALUE},
    {"ustar", REQUIRED_VALUE},
    {"model", REQUIRED_VALUE},
    {"pur-pyr", REQUIRED_VALUE},
    {"transition-transversion", REQUIRED_VALUE},
    {"bg", REQUIRED_VALUE},
    {"fg", REQUIRED_VALUE},
    {"motif", REQUIRED_VALUE},
    {"motif-name", REQUIRED_VALUE},
    {"bgfile", REQUIRED_VALUE},
    {"pseudocount", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE}
  };

  int option_index = 0;

  // Define the usage message.
  char      usage[1000] = "";
  strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n");
  strcat(usage, "\n");
  strcat(usage, "   Options:\n");

  // Evolutionary model parameters.
  strcat(usage, "     --hb\n");
  strcat(usage, "     --model single|average|jc|k2|f81|f84|hky|tn");
  strcat(usage, " (default=f81)\n");
  strcat(usage, "     --pur-pyr <float> (default=1.0)\n");
  strcat(usage, "     --transition-transversion <float> (default=0.5)\n");
  strcat(usage, "     --bg <float> (default=1.0)\n");
  strcat(usage, "     --fg <float> (default=1.0)\n");

  // Motif parameters.
  strcat(usage, "     --motif <id> (default=all)\n");
  strcat(usage, "     --motif-name <string> (default from motif file)\n");

  // Miscellaneous parameters
  strcat(usage, "     --bgfile <background> (default from motif file)\n");
  strcat(usage, "     --pseudocount <float> (default=0.1)\n");
  strcat(usage, "     --ustar <label>\n");	// TLB; create uniform star tree
  strcat(usage, "     --verbosity [1|2|3|4] (default 2)\n");
  strcat(usage, "\n    Prints the FP and FN rate at each of 10000 score values.\n");
  strcat(usage, "\n    Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n");

  // Parse the command line.
  if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) {
    die("Error processing command line options: option name too long.\n");
  }

  while (TRUE) { 
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;

    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
      (void) simple_getopterror(&message);
      die("Error processing command line options (%s)\n", message);
    }
    
    if (strcmp(option_name, "model") == 0) {
      if (strcmp(option_value, "jc") == 0) {
        model_type = JC_MODEL;
      } else if (strcmp(option_value, "k2") == 0) {
        model_type = K2_MODEL;
      } else if (strcmp(option_value, "f81") == 0) {
        model_type = F81_MODEL;
      } else if (strcmp(option_value, "f84") == 0) {
        model_type = F84_MODEL;
      } else if (strcmp(option_value, "hky") == 0) {
        model_type = HKY_MODEL;
      } else if (strcmp(option_value, "tn") == 0) {
        model_type = TAMURA_NEI_MODEL;
      } else if (strcmp(option_value, "single") == 0) {
        model_type = SINGLE_MODEL;
      } else if (strcmp(option_value, "average") == 0) {
        model_type = AVERAGE_MODEL;
      } else {
        die("Unknown model: %s\n", option_value);
      }
    } else if (strcmp(option_name, "hb") == 0){
        use_halpern_bruno = TRUE;
    } else if (strcmp(option_name, "ustar") == 0){	// TLB; create uniform star tree
        ustar_label = option_value;
    } else if (strcmp(option_name, "pur-pyr") == 0){
        purine_pyrimidine = atof(option_value);
    } else if (strcmp(option_name, "transition-transversion") == 0){
        transition_transversion = atof(option_value);
    } else if (strcmp(option_name, "bg") == 0){
      bg_rate = atof(option_value);
    } else if (strcmp(option_name, "fg") == 0){
      fg_rate = atof(option_value);
    } else if (strcmp(option_name, "motif") == 0){
        if (selected_motifs == NULL) {
          selected_motifs = new_string_list();
        }
       add_string(option_value, selected_motifs);
    } else if (strcmp(option_name, "motif-name") == 0){
        motif_name = option_value;
    } else if (strcmp(option_name, "bgfile") == 0){
      bg_filename = option_value;
    } else if (strcmp(option_name, "pseudocount") == 0){
        pseudocount = atof(option_value);
    } else if (strcmp(option_name, "verbosity") == 0){
        verbosity = atoi(option_value);
    }
  }

  // Must have tree and motif file names
  if (argc != option_index + 2) {
    fprintf(stderr, "%s", usage);
    exit(EXIT_FAILURE);
  } 

  /**********************************************
   * Read the phylogenetic tree.
   **********************************************/
  char* tree_filename = NULL;
  TREE_T* tree = NULL;
  tree_filename = argv[option_index];
  option_index++;
  tree = read_tree_from_file(tree_filename);

  // get the species names
  STRING_LIST_T* alignment_species = make_leaf_list(tree);
  char *root_label = get_label(tree);	// in case target in center
  if (strlen(root_label)>0) add_string(root_label, alignment_species);
  //write_string_list(" ", alignment_species, stderr);

  // TLB; Convert the tree to a uniform star tree with
  // the target sequence at its center.
  if (ustar_label != NULL) {
    tree = convert_to_uniform_star_tree(tree, ustar_label);
    if (tree == NULL) 
      die("Tree or alignment missing target %s\n", ustar_label);
    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(stderr, 
	"Target %s placed at center of uniform (d=%.3f) star tree:\n", 
          ustar_label, get_total_length(tree) / get_num_children(tree) 
      );
      write_tree(tree, stderr);
    }
  }

  /**********************************************
   * Read the motifs.
   **********************************************/
  char* meme_filename = argv[option_index];
  option_index++;
  int num_motifs = 0; 

  MREAD_T *mread;
  ALPH_T alph;
  ARRAYLST_T *motifs;
  ARRAY_T *bg_freqs;

  mread = mread_create(meme_filename, OPEN_MFILE);
  mread_set_bg_source(mread, bg_filename);
  mread_set_pseudocount(mread, pseudocount);
  // read motifs
  motifs = mread_load(mread, NULL);
  alph = mread_get_alphabet(mread);
  bg_freqs = mread_get_background(mread);
  // check
  if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename);

  

  // TLB; need to resize bg_freqs array to ALPH_SIZE items
  // or copy array breaks in HB mode.  This throws away
  // the freqs for the ambiguous characters;
  int asize = alph_size(alph, ALPH_SIZE);
  resize_array(bg_freqs, asize);

  /**************************************************************
  * Compute probability distributions for each of the selected motifs.
  **************************************************************/
  int motif_index;
  for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) {

    MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs);
    char* motif_id = get_motif_id(motif);
    char* bare_motif_id = motif_id;

    // We may have specified on the command line that
    // only certain motifs were to be used.
    if (selected_motifs != NULL) {
      if (*bare_motif_id == '+' || *bare_motif_id == '-') {
        // The selected  motif id won't included a strand indicator.
        bare_motif_id++;
      }
      if (have_string(bare_motif_id, selected_motifs) == FALSE) {
        continue;
      }
    }

    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(
        stderr, 
        "Using motif %s of width %d.\n",
        motif_id, get_motif_length(motif)
      );
    }

    // Build an array of evolutionary models for each position in the motif.
    EVOMODEL_T** models = make_motif_models(
      motif, 
      bg_freqs,
      model_type,
      fg_rate, 
      bg_rate, 
      purine_pyrimidine, 
      transition_transversion, 
      use_halpern_bruno
    );

    // Get the frequencies under the background model (row 0) 
    // and position-dependent scores (rows 1..w)
    // for each possible alignment column.
    MATRIX_T* pssm_matrix = build_alignment_pssm_matrix(
      alph,
      alignment_species,
      get_motif_length(motif) + 1, 
      models, 
      tree, 
      gap_support
    );
    ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); 
    copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs);
    remove_matrix_row(0, pssm_matrix);		// throw away first row
    //print_col_frequencies(alph, alignment_col_freqs);

    //
    // Get the position-dependent null model alignment column frequencies
    //
    int w = get_motif_length(motif);
    int ncols = get_num_cols(pssm_matrix); 
    MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols);
    for (i=0; i<w; i++) {
      // get the evo model corresponding to this column of the motif
      // and store it as the first evolutionary model.
      myfree(models[0]);
      // Use motif PSFM for equilibrium freqs. for model.
      ARRAY_T* site_specific_freqs = allocate_array(asize);
      int j = 0;
      for(j = 0; j < asize; j++) {
	double value = get_matrix_cell(i, j, get_motif_freqs(motif));
	set_array_item(j, value, site_specific_freqs);
      }
      if (use_halpern_bruno == FALSE) {
	models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  site_specific_freqs,
          NULL
	);
      } else {
        models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  bg_freqs,
	  site_specific_freqs
	);
      }
      // get the alignment column frequencies using this model
      MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix(
        alph,
	alignment_species,
	2,				// only interested in freqs under bkg
	models, 
	tree, 
	gap_support
      );
      // assemble the position-dependent background alignment column freqs.
      set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg);
      // chuck the pssm (not his real name)
      free_matrix(tmp_pssm_matrix);
    }

    //
    // Compute and print the score distribution under the background model
    // and under the (position-dependent) motif model.
    //
    int range = 10000;	// 10^4 gives same result as 10^5, but 10^3 differs

    // under background model
    PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);

    // under position-dependent background (motif) model
    PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);
    get_pv_lookup_pos_dep(
      pssm_pos_dep, 
      pos_dep_bkg, 
      NULL // no priors used
    );

    // print FP and FN distributions
    int num_items = get_pssm_pv_length(pssm_pos_dep);
    for (i=0; i<num_items; i++) {
      double pvf = get_pssm_pv(i, pssm);
      double pvt = get_pssm_pv(i, pssm_pos_dep);
      double fpr = pvf;
      double fnr = 1 - pvt;
      if (fpr >= 0.99999 || fnr == 0) continue;
      printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr);
    }

    // free stuff
    free_pssm(pssm);
    free_pssm(pssm_pos_dep);
    if (models != NULL) {
      int model_index;
      int num_models = get_motif_length(motif) + 1;
      for (model_index = 0; model_index < num_models; model_index++) {
        free_model(models[model_index]);
      }
      myfree(models);
    }

  } // motif

  arraylst_destroy(destroy_motif, motifs);

  /**********************************************
   * Clean up.
   **********************************************/
  // TLB may have encountered a memory corruption bug here
  // CEG has not been able to reproduce it. valgrind says all is well.
  free_array(bg_freqs);
  free_tree(TRUE, tree);
  free_string_list(selected_motifs);

  return(0);
} // main
int main(int argc, char** argv) {
  std::string source_file;
  std::string output_file;
  try {
    namespace po=boost::program_options;
    po::options_description desc("Options");
    desc.add_options()
    ("help,h", "Print help messages")
    ("source,s", po::value<std::string>(&source_file)->required(), "Specify an source file")
    ("output,o", po::value<std::string>(&output_file)->default_value(boost::filesystem::current_path().string<std::string>()+"/detector.data"), "Specify an output file");

    po::positional_options_description p;
    p.add("source",-1);

    po::variables_map vm;
    po::store(po::command_line_parser(argc,argv).options(desc).positional(p).run(), vm);

    if (vm.count("help")) {
      std::cout << "Usage: " << argv[0] << " [options] source" << std::endl;
      std::cout << desc;
      return 0;
    }

    po::notify(vm);
  }
  catch(std::exception& e) {
    std::cerr << "Error: " << e.what() << std::endl;
    return 1;
  }
  catch(...) {
    std::cerr << "Exception of unknown type!" << std::endl;
    return 1;
  }

  std::vector<float> single_detector_vector;
  std::vector<unsigned int> single_detector_vector_indices;

  char model_file[source_file.size()+1];
  strcpy(model_file, source_file.c_str());
  MODEL *model=read_model(model_file);

  DOC** supveclist = model->supvec;
  single_detector_vector.clear();
  single_detector_vector.resize(model->totwords, 0.);

  for (long ssv = 1; ssv < model->sv_num; ++ssv) {
    DOC* single_support_vector = supveclist[ssv];
    SVECTOR* single_support_vector_values = single_support_vector->fvec;
    WORD single_support_vector_component;
    for (long singleFeature = 0; singleFeature < model->totwords; ++singleFeature) {
      single_support_vector_component = single_support_vector_values->words[singleFeature];
      single_detector_vector.at(single_support_vector_component.wnum-1) += (single_support_vector_component.weight * model->alpha[ssv]);
    }
  }

  free_model(model,1);

  std::ofstream result_data;
  result_data.open(output_file.c_str(), std::ofstream::out|std::ofstream::app);

  for(std::vector<float>::iterator iter=single_detector_vector.begin(); iter!=single_detector_vector.end(); iter++) {
    result_data << *iter << std::endl;
  }
}
Пример #24
0
void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
			    LEARN_PARM *lparm, KERNEL_PARM *kparm, 
			    STRUCTMODEL *sm, int alg_type)
{
  int         i,j;
  int         numIt=0;
  long        argmax_count=0;
  long        totconstraints=0;
  long        kernel_type_org;
  double      epsilon,epsilon_cached;
  double      lhsXw,rhs_i;
  double      rhs=0;
  double      slack,ceps;
  double      dualitygap,modellength,alphasum;
  long        sizePsi;
  double      *alpha=NULL;
  long        *alphahist=NULL,optcount=0;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  double      *lhs_n=NULL;
  SVECTOR     *fy, *fydelta, **fycache, *lhs;
  MODEL       *svmModel=NULL;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0,rt_opt=0,rt_init=0,rt_psi=0,rt_viol=0,rt_kernel=0;
  double      rt_cacheupdate=0,rt_cacheconst=0,rt_cacheadd=0,rt_cachesum=0;
  double      rt1=0,rt2=0;
  long        progress;

  /*
  SVECTOR     ***fydelta_cache=NULL;
  double      **loss_cache=NULL;
  int         cache_size=0;
  */
  CCACHE      *ccache=NULL;
  int         cached_constraint;
  double      viol,viol_est,epsilon_est=0;
  long        uptr=0;
  long        *randmapping=NULL;
  long        batch_size=n;

  rt1=get_runtime();

  if(sparm->batch_size<100)
    batch_size=sparm->batch_size*n/100.0;

  init_struct_model(sample,sm,sparm,lparm,kparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); 
    fflush(stdout);
    exit(0); 
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  lparm->biased_hyperplane=0;     /* set threshold to zero */
  epsilon=100.0;                  /* start with low precision and
				     increase later */
  epsilon_cached=epsilon;         /* epsilon to use for iterations
				     using constraints constructed
				     from the constraint cache */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=(double *)realloc(alpha,sizeof(double)*cset.m);
    alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m);
    for(i=0; i<cset.m; i++) {
      alpha[i]=0;
      alphahist[i]=-1; /* -1 makes sure these constraints are never removed */
    }
  }
  kparm->gram_matrix=NULL;
  if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG))
    kparm->gram_matrix=init_kernel_matrix(&cset,kparm);

  /* set initial model and slack variables */
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  lparm->epsilon_crit=epsilon;
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  /* create a cache of the feature vectors for the correct labels */
  fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *));
  for(i=0;i<n;i++) {
    if(USE_FYCACHE) {
      fy=psi(ex[i].x,ex[i].y,sm,sparm);
      if(kparm->kernel_type == LINEAR_KERNEL) { /* store difference vector directly */
	diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); 
	free_svector(fy);
	fy=diff;
      }
    }
    else
      fy=NULL;
    fycache[i]=fy;
  }

  /* initialize the constraint cache */
  if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
    ccache=create_constraint_cache(sample,sparm,sm);
    /* NOTE:  */
    for(i=0;i<n;i++) 
      if(loss(ex[i].y,ex[i].y,sparm) != 0) {
	printf("ERROR: Loss function returns non-zero value loss(y_%d,y_%d)\n",i,i);
	printf("       W4 algorithm assumes that loss(y_i,y_i)=0 for all i.\n");
	exit(1);
      }
  }
  
  if(kparm->kernel_type == LINEAR_KERNEL)
    lhs_n=create_nvector(sm->sizePsi);

  /* randomize order or training examples */
  if(batch_size<n)
    randmapping=random_order(n);

  rt_init+=MAX(get_runtime()-rt1,0);
  rt_total+=rt_init;

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively find and add constraints to working set */

      if(struct_verbosity>=1) { 
	printf("Iter %i: ",++numIt); 
	fflush(stdout);
      }
      
      rt1=get_runtime();

      /**** compute current slack ****/
      alphasum=0;
      for(j=0;(j<cset.m);j++) 
	  alphasum+=alpha[j];
      for(j=0,slack=-1;(j<cset.m) && (slack==-1);j++)  
	if(alpha[j] > alphasum/cset.m)
	  slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      slack=MAX(0,slack);

      rt_total+=MAX(get_runtime()-rt1,0);

      /**** find a violated joint constraint ****/
      lhs=NULL;
      rhs=0;
      if(alg_type == ONESLACK_DUAL_CACHE_ALG) {
	rt1=get_runtime();
	/* Compute violation of constraints in cache for current w */
	if(struct_verbosity>=2) rt2=get_runtime();
	update_constraint_cache_for_model(ccache, svmModel);
	if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0);
	/* Is there is a sufficiently violated constraint in cache? */
	viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2);
	if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { 
	  /* There is a sufficiently violated constraint in cache, so
	     use this constraint in this iteration. */
	  if(struct_verbosity>=2) rt2=get_runtime();
	  viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  cached_constraint=1;
	}
	else {
	  /* There is no sufficiently violated constraint in cache, so
	     update cache by computing most violated constraint
	     explicitly for batch_size examples. */
	  viol_est=0;
	  progress=0;
	  viol=compute_violation_of_constraint_in_cache(ccache,0);
	  for(j=0;(j<batch_size) || ((j<n)&&(viol-slack<sparm->epsilon));j++) {
	    if(struct_verbosity>=1) 
	      print_percent_progress(&progress,n,10,".");
	    uptr=uptr % n;
	    if(randmapping) 
	      i=randmapping[uptr];
	    else
	      i=uptr;
	    /* find most violating fydelta=fy-fybar and rhs for example i */
	    find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],
					  fycache[i],n,sm,sparm,
					  &rt_viol,&rt_psi,&argmax_count);
	    /* add current fy-fybar and loss to cache */
	    if(struct_verbosity>=2) rt2=get_runtime();
	    viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model,
			     i,fydelta,rhs_i,0.0001*sparm->epsilon/n,
			     sparm->ccache_size,&rt_cachesum);
	    if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0);
	    viol_est+=ccache->constlist[i]->viol;
	    uptr++;
	  }
	  cached_constraint=(j<n);
	  if(struct_verbosity>=2) rt2=get_runtime();
	  if(cached_constraint)
	    viol=find_most_violated_joint_constraint_in_cache(ccache,
					       epsilon_est/2,lhs_n,&lhs,&rhs);
	  else
	    viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n,
							 &lhs,&rhs);
	  if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0);
	  viol_est*=((double)n/j);
	  epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack);
	  if((struct_verbosity >= 1) && (j!=n))
	    printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)",
		   100.0*j/n,viol_est-slack,epsilon_est);
	}
	lhsXw=rhs-viol;

	rt_total+=MAX(get_runtime()-rt1,0);
      }
      else { 
	/* do not use constraint from cache */
	rt1=get_runtime();
	cached_constraint=0;
	if(kparm->kernel_type == LINEAR_KERNEL)
	  clear_nvector(lhs_n,sm->sizePsi);
	progress=0;
	rt_total+=MAX(get_runtime()-rt1,0);

	for(i=0; i<n; i++) {
	  rt1=get_runtime();

	  if(struct_verbosity>=1) 
	    print_percent_progress(&progress,n,10,".");

	  /* compute most violating fydelta=fy-fybar and rhs for example i */
	  find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n,
				      sm,sparm,&rt_viol,&rt_psi,&argmax_count);
	  /* add current fy-fybar to lhs of constraint */
	  if(kparm->kernel_type == LINEAR_KERNEL) {
	    add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */
	    free_svector(fydelta);
	  }
	  else {
	    append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */
	    lhs=fydelta;
	  }
	  rhs+=rhs_i;                         /* add loss to rhs */
	  
	  rt_total+=MAX(get_runtime()-rt1,0);

	} /* end of example loop */

	rt1=get_runtime();

	/* create sparse vector from dense sum */
	if(kparm->kernel_type == LINEAR_KERNEL)
	  lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0,
				 COMPACT_ROUNDING_THRESH);
	doc=create_example(cset.m,0,1,1,lhs);
	lhsXw=classify_example(svmModel,doc);
	free_example(doc,0);
	viol=rhs-lhsXw;

	rt_total+=MAX(get_runtime()-rt1,0);

      } /* end of finding most violated joint constraint */

      rt1=get_runtime();

      /**** if `error', then add constraint and recompute QP ****/
      if(slack > (rhs-lhsXw+0.000001)) {
	printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n");
	printf("         set! There is probably a bug in 'find_most_violated_constraint_*'.\n");
	printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw);
	/* exit(1); */
      }
      ceps=MAX(0,rhs-lhsXw-slack);
      if((ceps > sparm->epsilon) || cached_constraint) { 
	/**** resize constraint matrix and add new constraint ****/
	cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1));
	cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs);
	cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1));
	cset.rhs[cset.m]=rhs;
	alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1));
	alpha[cset.m]=0;
	alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1));
	alphahist[cset.m]=optcount;
	cset.m++;
	totconstraints++;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG)) {
	  if(struct_verbosity>=2) rt2=get_runtime();
	  kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1,
						  &cset,kparm);
	  if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0);
	}
	
	/**** get new QP solution ****/
	if(struct_verbosity>=1) {
	  printf("*");fflush(stdout);
	}
	if(struct_verbosity>=2) rt2=get_runtime();
	/* set svm precision so that higher than eps of most violated constr */
	if(cached_constraint) {
	  epsilon_cached=MIN(epsilon_cached,ceps); 
	  lparm->epsilon_crit=epsilon_cached/2; 
	}
	else {
	  epsilon=MIN(epsilon,ceps); /* best eps so far */
	  lparm->epsilon_crit=epsilon/2; 
	  epsilon_cached=epsilon;
	}
	free_model(svmModel,0);
	svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	/* Run the QP solver on cset. */
	kernel_type_org=kparm->kernel_type;
	if((alg_type == ONESLACK_DUAL_ALG) 
	   || (alg_type == ONESLACK_DUAL_CACHE_ALG))
	  kparm->kernel_type=GRAM; /* use kernel stored in kparm */
	svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi,
			       lparm,kparm,NULL,svmModel,alpha);
	kparm->kernel_type=kernel_type_org; 
	svmModel->kernel_parm.kernel_type=kernel_type_org;
	/* Always add weight vector, in case part of the kernel is
	   linear. If not, ignore the weight vector since its
	   content is bogus. */
	add_weight_vector_to_linear_model(svmModel);
	sm->svm_model=svmModel;
	sm->w=svmModel->lin_weights; /* short cut to weight vector */
	optcount++;
	/* keep track of when each constraint was last
	   active. constraints marked with -1 are not updated */
	for(j=0;j<cset.m;j++) 
	  if((alphahist[j]>-1) && (alpha[j] != 0))  
	    alphahist[j]=optcount;
	if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0);
	
	/* Check if some of the linear constraints have not been
	   active in a while. Those constraints are then removed to
	   avoid bloating the working set beyond necessity. */
	if(struct_verbosity>=3)
	  printf("Reducing working set...");fflush(stdout);
	remove_inactive_constraints(&cset,alpha,optcount,alphahist,50);
	if(struct_verbosity>=3)
	  printf("done. ");
      }
      else {
	free_svector(lhs);
      }

      if(struct_verbosity>=1)
	printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m,
	       svmModel->sv_num-1,ceps,svmModel->maxdiff);

      rt_total+=MAX(get_runtime()-rt1,0);

  } while(finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)|| cached_constraint || (ceps > sparm->epsilon) );

  // originally like below ... finalize_iteration was not called because of short-circuit evaluation
//  } while(cached_constraint || (ceps > sparm->epsilon) || 
//	  finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm)
//	 );
  
  if(struct_verbosity>=1) {
    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,ceps));

    slack=0;
    for(j=0;j<cset.m;j++) 
      slack=MAX(slack,
		cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
    alphasum=0;
    for(i=0; i<cset.m; i++)  
      alphasum+=alpha[i]*cset.rhs[i];
    if(kparm->kernel_type == LINEAR_KERNEL)
      modellength=model_length_n(svmModel);
    else
      modellength=model_length_s(svmModel);
    dualitygap=(0.5*modellength*modellength+sparm->C*viol)
               -(alphasum-0.5*modellength*modellength);
    
    printf("Upper bound on duality gap: %.5f\n", dualitygap);
    printf("Dual objective value: dval=%.5f\n",
	    alphasum-0.5*modellength*modellength);
    printf("Primal objective value: pval=%.5f\n",
	    0.5*modellength*modellength+sparm->C*viol);
    printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints);
    printf("Number of iterations: %d\n",numIt);
    printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count);
    printf("Number of SV: %ld \n",svmModel->sv_num-1);
    printf("Norm of weight vector: |w|=%.5f\n",modellength);
    printf("Value of slack variable (on working set): xi=%.5f\n",slack);
    printf("Value of slack variable (global): xi=%.5f\n",viol);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    if(struct_verbosity>=2) 
      printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n",
	   rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total,
	   (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, 
	   (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total,
	   (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total,
	   (100.0*rt_cachesum)/rt_total);
    else if(struct_verbosity==1) 
      printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0);
  }
  if(ccache) {
    long cnum=0;
    CCACHEELEM *celem;
    for(i=0;i<n;i++) 
      for(celem=ccache->constlist[i];celem;celem=celem->next) 
	cnum++;
    printf("Final number of constraints in cache: %ld\n",cnum);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
    free_model(svmModel,0);
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(lhs_n)
    free_nvector(lhs_n);
  if(ccache)    
    free_constraint_cache(ccache);
  for(i=0;i<n;i++)
    if(fycache[i])
      free_svector(fycache[i]);
  free(fycache);
  free(alpha); 
  free(alphahist); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
  if(kparm->gram_matrix)
    free_matrix(kparm->gram_matrix);
}
Пример #25
0
//================================================================
void free_track_section_model(int scene, int trackid, int sectionid)
{
    if(!is_track(scene,trackid)) return;
    free_model(trackscenevec[scene]->trackvec[trackid], sectionid);
}
Пример #26
0
void write_model(char *modelfile, MODEL *model)
{
  FILE *modelfl;
  long j,i,sv_num;
  SVECTOR *v;
  MODEL *compact_model=NULL;
 
  if(verbosity>=1) {
    printf("Writing model file..."); fflush(stdout);
  }

  /* Replace SV with single weight vector */
  if(0 && model->kernel_parm.kernel_type == LINEAR) {
    if(verbosity>=1) {
      printf("(compacting..."); fflush(stdout);
    }
    compact_model=compact_linear_model(model);
    model=compact_model;
    if(verbosity>=1) {
      printf("done)"); fflush(stdout);
    }
  }

  if ((modelfl = fopen (modelfile, "w")) == NULL)
  { perror (modelfile); exit (1); }
  fprintf(modelfl,"SVM-light Version %s\n",VERSION);
  fprintf(modelfl,"%ld # kernel type\n",
	  model->kernel_parm.kernel_type);
  fprintf(modelfl,"%ld # kernel parameter -d \n",
	  model->kernel_parm.poly_degree);
  fprintf(modelfl,"%.8g # kernel parameter -g \n",
	  model->kernel_parm.rbf_gamma);
  fprintf(modelfl,"%.8g # kernel parameter -s \n",
	  model->kernel_parm.coef_lin);
  fprintf(modelfl,"%.8g # kernel parameter -r \n",
	  model->kernel_parm.coef_const);
  fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom);
  fprintf(modelfl,"%ld # highest feature index \n",model->totwords);
  fprintf(modelfl,"%ld # number of training documents \n",model->totdoc);
 
  sv_num=1;
  for(i=1;i<model->sv_num;i++) {
    for(v=model->supvec[i]->fvec;v;v=v->next) 
      sv_num++;
  }
  fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num);
  fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b);

  for(i=1;i<model->sv_num;i++) {
    for(v=model->supvec[i]->fvec;v;v=v->next) {
      fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor);
      for (j=0; (v->words[j]).wnum; j++) {
	fprintf(modelfl,"%ld:%.8g ",
		(long)(v->words[j]).wnum,
		(double)(v->words[j]).weight);
      }
      if(v->userdefined)
	fprintf(modelfl,"#%s\n",v->userdefined);
      else
	fprintf(modelfl,"#\n");
    /* NOTE: this could be made more efficient by summing the
       alpha's of identical vectors before writing them to the
       file. */
    }
  }
  fclose(modelfl);
  if(compact_model)
    free_model(compact_model,1);
  if(verbosity>=1) {
    printf("done\n");
  }
}
Пример #27
0
IMP_VOID ipReleaseTargetClassifieri( IpTargetClassifier *pstTgtClfier )
{
	IpClassifierPara *pstParams = &pstTgtClfier->stPara;
	free_model(pstParams->pstModel,1);
	memset( pstTgtClfier, 0, sizeof(IpTargetClassifier) );
}
Пример #28
0
int main(int argc, char *argv[]) {

  int i, k, n=0;
  int iformat, oformat;
  nip_model model = NULL;
  time_series* ts_set = NULL;

  if(argc < 6){
    printf("You must specify: \n"); 
    printf(" - the NET file for the model, \n");
    printf(" - input format ('univariate'), \n");
    printf(" - input file name, \n");
    printf(" - output format ('unary'), \n");
    printf(" - output file name, please!\n");
    return 0;
  }
  
  /* read the model */
  model = parse_model(argv[1]);
  if(!model){
    printf("Unable to parse the NET file: %s?\n", argv[1]);
    return -1;
  }

  /* read file formats */
  /* Reminder: strcasecmp() is NOT ANSI C. */
  if(strcasecmp(argv[2], S_UNIVARIATE) == 0)
    iformat = UNIVARIATE;
  /* additional formats here */
  else{
    printf("Invalid input file format: %s?\n", argv[2]);
    free_model(model);
    return -1;
  }

  if(strcasecmp(argv[4], S_UNARY) == 0)
    oformat = UNARY;
  /* additional formats here */
  else{
    printf("Invalid output file format: %s?\n", argv[4]);
    free_model(model);
    return -1;
  }

  /* Read the input data file */
  switch (iformat) {
  case UNIVARIATE:
  case MULTIVARIATE:
    n = read_timeseries(model, argv[3], &ts_set);
    break;
  default:
    n = 0; /* should be impossible */
  }
  if(n < 1){
    fprintf(stderr, "There were errors while reading %s\n", argv[3]);
    free_model(model);
    /* no ts_set to free (?) */
    return -1;
  }

  /* Write the results to the file */
  k = NIP_NO_ERROR;
  switch (oformat) {
  case UNARY:
    k = write_unary_timeseries(ts_set, n, argv[5]);
    break;
  default:
    ; /* shouldn't happen */
  }
  if(k != NIP_NO_ERROR){
    fprintf(stderr, "Failed to write the data into %s\n", argv[5]);
    nip_report_error(__FILE__, __LINE__, k, 1);
    for(i = 0; i < n; i++)
      free_timeseries(ts_set[i]);
    free(ts_set);
    free_model(model);
    return -1;
  }

  for(i = 0; i < n; i++)
    free_timeseries(ts_set[i]);
  free(ts_set);
  free_model(model);
  return 0;
}
Пример #29
0
void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm,
		      LEARN_PARM *lparm, KERNEL_PARM *kparm, 
		      STRUCTMODEL *sm)
{
  int         i,j;
  int         numIt=0;
  long        newconstraints=0, activenum=0; 
  int         opti_round, *opti;
  long        old_numConst=0;
  double      epsilon;
  long        tolerance;
  double      lossval,factor;
  double      margin=0;
  double      slack, *slacks, slacksum;
  long        sizePsi;
  double      *alpha=NULL;
  CONSTSET    cset;
  SVECTOR     *diff=NULL;
  SVECTOR     *fy, *fybar, *f;
  SVECTOR     *slackvec;
  WORD        slackv[2];
  MODEL       *svmModel=NULL;
  KERNEL_CACHE *kcache=NULL;
  LABEL       ybar;
  DOC         *doc;

  long        n=sample.n;
  EXAMPLE     *ex=sample.examples;
  double      rt_total=0.0, rt_opt=0.0;
  long        rt1,rt2;

  init_struct_model(sample,sm,sparm); 
  sizePsi=sm->sizePsi+1;          /* sm must contain size of psi on return */

  /* initialize example selection heuristic */ 
  opti=(int*)my_malloc(n*sizeof(int));
  for(i=0;i<n;i++) {
    opti[i]=0;
  }
  opti_round=0;

  if(sparm->slack_norm == 1) {
    lparm->svm_c=sparm->C;          /* set upper bound C */
    lparm->sharedslack=1;
  }
  else if(sparm->slack_norm == 2) {
    lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */
    lparm->sharedslack=0;
    if(kparm->kernel_type != LINEAR) {
      printf("ERROR: Kernels are not implemented for L2 slack norm!"); 
      fflush(stdout);
      exit(0);
    }
  }
  else {
    printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout);
    exit(0);
  }


  epsilon=1.0;                    /* start with low precision and
				     increase later */
  tolerance=n/100;                /* increase precision, whenever less
                                     than that number of constraints
                                     is not fulfilled */
  lparm->biased_hyperplane=0;     /* set threshold to zero */

  cset=init_struct_constraints(sample, sm, sparm);
  if(cset.m > 0) {
    alpha=realloc(alpha,sizeof(double)*cset.m);
    for(i=0; i<cset.m; i++) 
      alpha[i]=0;
  }

  /* set initial model and slack variables*/
  svmModel=(MODEL *)my_malloc(sizeof(MODEL));
  svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
			 lparm,kparm,NULL,svmModel,alpha);
  add_weight_vector_to_linear_model(svmModel);
  sm->svm_model=svmModel;
  sm->w=svmModel->lin_weights; /* short cut to weight vector */

  printf("Starting Iterations\n");

    /*****************/
   /*** main loop ***/
  /*****************/
  do { /* iteratively increase precision */

    epsilon=MAX(epsilon*0.09999999999,sparm->epsilon);
    if(epsilon == sparm->epsilon)   /* for final precision, find all SV */
      tolerance=0;
    lparm->epsilon_crit=epsilon/2;  /* svm precision must be higher than eps */
    if(struct_verbosity>=1)
      printf("Setting current working precision to %g.\n",epsilon);

    do { /* iteration until (approx) all SV are found for current
            precision and tolerance */
      
      old_numConst=cset.m;
      opti_round++;
      activenum=n;

      do { /* go through examples that keep producing new constraints */

	if(struct_verbosity>=1) { 
	  printf("--Iteration %i (%ld active): ",++numIt,activenum); 
	  fflush(stdout);
	}
	
	for(i=0; i<n; i++) { /*** example loop ***/
	  
	  rt1=get_runtime();
	    
	  if(opti[i] != opti_round) {/* if the example is not shrunk
	                                away, then see if it is necessary to 
					add a new constraint */
	    if(sparm->loss_type == SLACK_RESCALING) 
	      ybar=find_most_violated_constraint_slackrescaling(ex[i].x,
								ex[i].y,sm,
								sparm);
	    else
	      ybar=find_most_violated_constraint_marginrescaling(ex[i].x,
								 ex[i].y,sm,
								 sparm);
	    
	    if(empty_label(ybar)) {
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	      if(struct_verbosity>=2)
		printf("no-incorrect-found(%i) ",i);
	      continue;
	    }
	  
	    /**** get psi(y)-psi(ybar) ****/
	    fy=psi(ex[i].x,ex[i].y,sm,sparm);
	    fybar=psi(ex[i].x,ybar,sm,sparm);
	    
	    /**** scale feature vector and margin by loss ****/
	    lossval=loss(ex[i].y,ybar,sparm);
	    if(sparm->slack_norm == 2)
	      lossval=sqrt(lossval);
	    if(sparm->loss_type == SLACK_RESCALING)
	      factor=lossval;
	    else               /* do not rescale vector for */
	      factor=1.0;      /* margin rescaling loss type */
	    for(f=fy;f;f=f->next)
	      f->factor*=factor;
	    for(f=fybar;f;f=f->next)
	      f->factor*=-factor;
	    margin=lossval;

	    /**** create constraint for current ybar ****/
	    append_svector_list(fy,fybar);/* append the two vector lists */
	    doc=create_example(cset.m,0,i+1,1,fy);

	    /**** compute slack for this example ****/
	    slack=0;
	    for(j=0;j<cset.m;j++) 
	      if(cset.lhs[j]->slackid == i+1) {
		if(sparm->slack_norm == 2) /* works only for linear kernel */
		  slack=MAX(slack,cset.rhs[j]
			          -(classify_example(svmModel,cset.lhs[j])
				    -sm->w[sizePsi+i]/(sqrt(2*sparm->C))));
		else
		  slack=MAX(slack,
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
	      }
	    
	    /**** if `error' add constraint and recompute ****/
	    if((classify_example(svmModel,doc)+slack)<(margin-epsilon)) { 
	      if(struct_verbosity>=2)
		{printf("(%i) ",i); fflush(stdout);}
	      if(struct_verbosity==1)
		{printf("."); fflush(stdout);}
	      
	      /**** resize constraint matrix and add new constraint ****/
	      cset.m++;
	      cset.lhs=realloc(cset.lhs,sizeof(DOC *)*cset.m);
	      if(kparm->kernel_type == LINEAR) {
		diff=add_list_ss(fy); /* store difference vector directly */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(diff));
		else if(sparm->slack_norm == 2) {
		  /**** add squared slack variable to feature vector ****/
		  slackv[0].wnum=sizePsi+i;
		  slackv[0].weight=1/(sqrt(2*sparm->C));
		  slackv[1].wnum=0; /*terminator*/
		  slackvec=create_svector(slackv,"",1.0);
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    add_ss(diff,slackvec));
		  free_svector(slackvec);
		}
		free_svector(diff);
	      }
	      else { /* kernel is used */
		if(sparm->slack_norm == 1) 
		  cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1,
						    copy_svector(fy));
		else if(sparm->slack_norm == 2)
		  exit(1);
	      }
	      cset.rhs=realloc(cset.rhs,sizeof(double)*cset.m);
	      cset.rhs[cset.m-1]=margin;
	      alpha=realloc(alpha,sizeof(double)*cset.m);
	      alpha[cset.m-1]=0;
	      newconstraints++;
	    }
	    else {
	      printf("+"); fflush(stdout); 
	      if(opti[i] != opti_round) {
		activenum--;
		opti[i]=opti_round; 
	      }
	    }

	    free_example(doc,0);
	    free_svector(fy); /* this also free's fybar */
	    free_label(ybar);
	  }

	  /**** get new QP solution ****/
	  if((newconstraints >= sparm->newconstretrain) 
	     || ((newconstraints > 0) && (i == n-1))) {
	    if(struct_verbosity>=1) {
	      printf("*");fflush(stdout);
	    }
	    rt2=get_runtime();
	    free_model(svmModel,0);
	    svmModel=(MODEL *)my_malloc(sizeof(MODEL));
	    /* Always get a new kernel cache. It is not possible to use the
	       same cache for two different training runs */
	    if(kparm->kernel_type != LINEAR)
	      kcache=kernel_cache_init(cset.m,lparm->kernel_cache_size);
	    /* Run the QP solver on cset. */
	    svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n,
				   lparm,kparm,kcache,svmModel,alpha);
	    if(kcache)
	      kernel_cache_cleanup(kcache);
	    /* Always add weight vector, in case part of the kernel is
	       linear. If not, ignore the weight vector since its
	       content is bogus. */
	    add_weight_vector_to_linear_model(svmModel);
	    sm->svm_model=svmModel;
	    sm->w=svmModel->lin_weights; /* short cut to weight vector */
	    rt_opt+=MAX(get_runtime()-rt2,0);
	    
	    newconstraints=0;
	  }	

	  rt_total+=MAX(get_runtime()-rt1,0);
	} /* end of example loop */

	if(struct_verbosity>=1)
	  printf("(NumConst=%d, SV=%ld, Eps=%.4f)\n",cset.m,svmModel->sv_num-1,
		 svmModel->maxdiff);

      } while(activenum > 0);   /* repeat until all examples produced no
				   constraint at least once */

    } while((cset.m - old_numConst) > tolerance) ;

  } while(epsilon > sparm->epsilon);  

  if(struct_verbosity>=1) {
    /**** compute sum of slacks ****/
    slacks=(double *)my_malloc(sizeof(double)*(n+1));
    for(i=0; i<=n; i++) { 
      slacks[i]=0;
    }
    if(sparm->slack_norm == 1) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
			   cset.rhs[j]-classify_example(svmModel,cset.lhs[j]));
      }
    else if(sparm->slack_norm == 2) {
      for(j=0;j<cset.m;j++) 
	slacks[cset.lhs[j]->slackid]=MAX(slacks[cset.lhs[j]->slackid],
		cset.rhs[j]
	         -(classify_example(svmModel,cset.lhs[j])
		   -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*sparm->C))));
    }
    slacksum=0;
    for(i=0; i<=n; i++)  
      slacksum+=slacks[i];
    free(slacks);

    printf("Final epsilon on KKT-Conditions: %.5f\n",
	   MAX(svmModel->maxdiff,epsilon));
    printf("Total number of constraints added: %i\n",(int)cset.m);
    if(sparm->slack_norm == 1) {
      printf("Number of SV: %ld \n",svmModel->sv_num-1);
      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
	     svmModel->at_upper_bound,n);
      printf("Norm of weight vector: |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    else if(sparm->slack_norm == 2){ 
      printf("Number of SV: %ld (including %ld at upper bound)\n",
	     svmModel->sv_num-1,svmModel->at_upper_bound);
      printf("Norm of weight vector (including L2-loss): |w|=%.5f\n",
	     model_length_s(svmModel,kparm));
    }
    printf("Sum of slack variables: sum(xi_i)=%.5f\n",slacksum);
    printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n",
	   length_of_longest_document_vector(cset.lhs,cset.m,kparm));
    printf("Runtime in cpu-seconds: %.2f (%.2f%% for SVM optimization)\n",
	   rt_total/100.0, 100.0*rt_opt/rt_total);
  }
  if(struct_verbosity>=4)
    printW(sm->w,sizePsi,n,lparm->svm_c);

  if(svmModel) {
    sm->svm_model=copy_model(svmModel);
    sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */
  }

  print_struct_learning_stats(sample,sm,cset,alpha,sparm);

  if(svmModel)
    free_model(svmModel,0);
  free(alpha); 
  free(opti); 
  free(cset.rhs); 
  for(i=0;i<cset.m;i++) 
    free_example(cset.lhs[i],1);
  free(cset.lhs);
}
Пример #30
0
int SVMLightRunner::librarySVMLearnMain(
    int argc, char **argv, bool use_gmumr, SVMConfiguration &config
) {
    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMLearnMain() Started."
    );
    DOC **docs;  /* training examples */
    long totwords,totdoc,i;
    double *target;
    double *alpha_in=NULL;
    KERNEL_CACHE *kernel_cache;
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

    // GMUM.R changes {
    librarySVMLearnReadInputParameters(
        argc, argv, docfile, modelfile, restartfile, &verbosity, &learn_parm,
        &kernel_parm, use_gmumr, config
    );

    kernel_parm.kernel_type = static_cast<long int>(config.kernel_type);

    libraryReadDocuments(
        docfile, &docs, &target, &totwords, &totdoc, use_gmumr, config
    );
    // GMUM.R changes }

    if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);

    if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
      kernel_cache=NULL;
    }
    else {
      /* Always get a new kernel cache. It is not possible to use the
       * same cache for two different training runs */
      kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
    }

    //gmum.r
    init_global_params_QP();

    if(learn_parm.type == CLASSIFICATION) {
      svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			     &kernel_parm,kernel_cache,model,alpha_in);
    }
    else if(learn_parm.type == REGRESSION) {
      svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			 &kernel_parm,&kernel_cache,model);
    }
    else if(learn_parm.type == RANKING) {
      svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
		      &kernel_parm,&kernel_cache,model);
    }
    else if(learn_parm.type == OPTIMIZATION) {
      svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,kernel_cache,model,alpha_in);
    }
    //gmum.r
    config.iter = learn_parm.iterations;

    if(kernel_cache) {
      /* Free the memory used for the cache. */
      kernel_cache_cleanup(kernel_cache);
    }

    /* Warning: The model contains references to the original data 'docs'.
       If you want to free the original data, and only keep the model, you 
       have to make a deep copy of 'model'. */
    /* deep_copy_of_model=copy_model(model); */
    // GMUM.R changes {
    if (!use_gmumr) {
        write_model(modelfile,model);
    } else {
        SVMLightModelToSVMConfiguration(model, config);
    }
    // GMUM.R changes }

    free(alpha_in);
    free_model(model,0);
    for(i=0;i<totdoc;i++) 
      free_example(docs[i],1);
    free(docs);
    free(target);

    LOG(
        config.log,
        LogLevel::DEBUG_LEVEL,
        __debug_prefix__ + ".librarySVMLearnMain() Done."
    );

    return(0);
}