void free_struct_sample(SAMPLE s) {
  int i;
  for (i=0;i<s.n;i++) {
    free_pattern(s.examples[i].x);
    free_label(s.examples[i].y);
    free_latent_var(s.examples[i].h);
  }
  free(s.examples);

}
int main(int argc, char* argv[]) {
  double avgloss,l;
  long i, correct;

  char testfile[1024];
  char modelfile[1024];

  STRUCTMODEL model;
  STRUCT_LEARN_PARM sparm;
  LEARN_PARM lparm;
  KERNEL_PARM kparm;

  SAMPLE testsample;
  LABEL y;
  LATENT_VAR h; 

  /* read input parameters */
  read_input_parameters(argc,argv,testfile,modelfile,&sparm);

  /* read model file */
  printf("Reading model..."); fflush(stdout);
//  model = read_struct_model(modelfile, &sparm);
  printf("done.\n"); 

  /* read test examples */
  printf("Reading test examples..."); fflush(stdout);
  testsample = read_struct_examples(testfile,&sparm);
  printf("done.\n");

  init_struct_model(testsample,&model,&sparm,&lparm,&kparm);
  
  avgloss = 0.0;
  correct = 0;
  for (i=0;i<testsample.n;i++) {
    classify_struct_example(testsample.examples[i].x,&y,&h,&model,&sparm);
    l = loss(testsample.examples[i].y,y,h,&sparm);
    avgloss += l;
    if (l==0) correct++;

    free_label(y);
    free_latent_var(h); 
  }

  printf("Average loss on test set: %.4f\n", avgloss/testsample.n);
  printf("Zero/one error on test set: %.4f\n", 1.0 - ((float) correct)/testsample.n);

  free_struct_sample(testsample);
  free_struct_model(model,&sparm);

  return(0);

}
void free_struct_sample(SAMPLE s) {
/*
  Free the whole training sample. 
*/
  int i;
  for (i=0;i<s.n;i++) {
    free_pattern(s.examples[i].x);
    free_label(s.examples[i].y);
    free_latent_var(s.examples[i].h);
  }
  free(s.examples);

}
double optimizeMultiVariatePerfMeasure(SAMPLE sample, int datasetStartIdx, int chunkSz, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm,
		double C, double Cdash, double epsilon, int MAX_ITER, LEARN_PARM *learn_parm, char *trainfile,
		double ***w_iters, int eid, int chunkid, int numChunks, double *zeroes){

	int i;
	time_t time_start, time_end;
	double decrement;
	double primal_obj, last_primal_obj;
	double cooling_eps;
	double stop_crit;
	LATENT_VAR *imputed_h = NULL;

	int dataset_sz = sample.n;
	SVECTOR **fycache, *diff, *fy;
	EXAMPLE *ex = sample.examples;

	/* some training information */
	printf("C: %.8g\n", C);
	printf("Cdash: %.8g\n", Cdash);
	printf("epsilon: %.8g\n", epsilon);
	printf("sample.n: %ld\n", dataset_sz);
	printf("sm->sizePsi: %ld\n", sm->sizePsi); fflush(stdout);

	/* prepare feature vector cache for correct labels with imputed latent variables */
	fycache = (SVECTOR**)malloc(dataset_sz*sizeof(SVECTOR*));
	for (i=0;i<dataset_sz;i++) {
		fy = psi(ex[i].x, ex[i].y, ex[i].h, sm, sparm);
		diff = add_list_ss(fy);
		free_svector(fy);
		fy = diff;
		fycache[i] = fy;
	}

	/* time taken stats */
	time(&time_start);

	/* outer loop: latent variable imputation */
	int outer_iter = 0;
	last_primal_obj = 0;
	decrement = 0;
	cooling_eps = 0.5*MAX(C,Cdash)*epsilon;
	while ((outer_iter<2)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) {
		printf("OUTER ITER %d\n", outer_iter); fflush(stdout);
		/* cutting plane algorithm */
		time_t cp_start, cp_end;
		time(&cp_start);

		/// NOTE : Change of variables (Create 'u' by subtracting w_prev from w)
		create_u_variables(w_iters, eid, chunkid, numChunks, sm, zeroes);

		if(chunkid == 0 && eid == 0){ // First Chunk of First Epoch
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, zeroes, numChunks); // pass the zeroes vector
		}
		else if(chunkid == 0){ // First chunk of the new Epoch
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, w_iters[eid-1][numChunks-1], numChunks); // Last chunk of previous epoch
		}
		else {
			primal_obj = cutting_plane_algorithm(w_iters[eid][chunkid], dataset_sz, MAX_ITER, C, cooling_eps,
							fycache, ex, sm, sparm,	learn_parm->tmpdir, trainfile, learn_parm->frac_sim,
							learn_parm->Fweight, learn_parm->dataset_stats_file, learn_parm->rho_admm,
							learn_parm->isExhaustive, learn_parm->isLPrelaxation, Cdash, datasetStartIdx, chunkSz,
							eid, chunkid, w_iters[eid][chunkid-1], numChunks); // previous chunk id of current epoch
		}

		time(&cp_end);

#if(DEBUG_LEVEL==1)
		char msg[20];
		sprintf(msg,"OUTER ITER %d", outer_iter);
		print_time(cp_start, cp_end, msg);
#endif

		/* compute decrement in objective in this outer iteration */
		decrement = last_primal_obj - primal_obj;
		last_primal_obj = primal_obj;
		printf("primal objective: %.4f\n", primal_obj);
		printf("decrement: %.4f\n", decrement); fflush(stdout);

		stop_crit = (decrement<MAX(C, Cdash)*epsilon)&&(cooling_eps<0.5*MAX(C, Cdash)*epsilon+1E-8);

		cooling_eps = -decrement*0.01;
		cooling_eps = MAX(cooling_eps, 0.5*MAX(C,Cdash)*epsilon);
		printf("cooling_eps: %.8g\n", cooling_eps);


		/* impute latent variable using updated weight vector */
		for(i = 0; i < dataset_sz; i ++)
			free_latent_var(ex[i].h);
		if(imputed_h != NULL)
			free(imputed_h);

		imputed_h = (LATENT_VAR*)malloc(sizeof(LATENT_VAR) * dataset_sz);
		infer_latent_variables_all(imputed_h, sm, sparm, dataset_sz, learn_parm->tmpdir, trainfile, datasetStartIdx, chunkSz, eid, chunkid);

		for (i=0;i<dataset_sz;i++) {
			//      free_latent_var(ex[i].h);
			//      ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm); // ILP for  Pr (Z | Y_i, X_i) in our case
			ex[i].h = imputed_h[i];
		}
		/* re-compute feature vector cache */
		for (i=0;i<dataset_sz;i++) {
			free_svector(fycache[i]);
			fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);
			diff = add_list_ss(fy);
			free_svector(fy);
			fy = diff;
			fycache[i] = fy;
		}
		printf("(OnlineSVM) .. finished outer_iter %d\n",outer_iter);
		outer_iter++;

		/// NOTE: Restore the 'w' by adding the current 'u' to w_prev
		restore_w_variables(w_iters, eid, chunkid, numChunks, sm, zeroes);

	} // end outer loop

	time(&time_end);

	#if (DEBUG_LEVEL==1)
	  print_time(time_start, time_end, "Total time");
	#endif

	for(i=0;i<dataset_sz;i++) {
		free_svector(fycache[i]);
	}
	free(fycache);

	return primal_obj;
}
SVECTOR* find_cutting_plane(EXAMPLE *ex, SVECTOR **fycache, double *margin, long m, STRUCTMODEL *sm,
		STRUCT_LEARN_PARM *sparm, char* tmpdir, char *trainfile, double frac_sim, double Fweight,
		char *dataset_stats_file, double rho_admm, long isExhaustive, long isLPrelaxation,
		double *margin2, int datasetStartIdx, int chunkSz, int eid, int chunkid) {

  long i;
  SVECTOR *f, *fy, *fybar, *lhs;
  LABEL       ybar;
  LATENT_VAR hbar;
  double lossval;
  double *new_constraint;

  long l,k;
  SVECTOR *fvec;
  WORD *words;  

  LABEL       *ybar_all = (LABEL*) malloc(sizeof(LABEL) * m);
  LATENT_VAR *hbar_all = (LATENT_VAR*) malloc (sizeof(LATENT_VAR) * m);
  time_t mv_start, mv_end;

  time(&mv_start);
  find_most_violated_constraint_marginrescaling_all_online(ybar_all, hbar_all, sm, sparm, m,
		  tmpdir, trainfile, frac_sim, dataset_stats_file, rho_admm, isExhaustive, isLPrelaxation,
		  Fweight, datasetStartIdx, chunkSz, eid, chunkid);
  time(&mv_end);

#if (DEBUG_LEVEL==1)
  print_time(mv_start, mv_end, "Max violators");
#endif


  /* find cutting plane */
  lhs = NULL;
  lossval = lossF1(ex, m, ybar_all, sparm, Fweight);
  *margin = lossval;

  *margin2 = 0;
  for (i=0;i<m;i++) {
    //find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, &hbar, sm, sparm);
    ybar = ybar_all[i];
    hbar = hbar_all[i];
    /* get difference vector */
    fy = copy_svector(fycache[i]);
    fybar = psi(ex[i].x,ybar,hbar,sm,sparm);
    lossval = loss(ex[i].y,ybar,hbar,sparm);
    free_label(ybar);
    free_latent_var(hbar);

    /* scale difference vector */
    for (f=fy;f;f=f->next) {
      f->factor*=1.0/m;
      //f->factor*=ex[i].x.example_cost/m;
    }

    for (f=fybar;f;f=f->next) {
      f->factor*=-1.0/m;
      //f->factor*=-ex[i].x.example_cost/m;
    }
    /* add ybar to constraint */
    append_svector_list(fy,lhs);
    append_svector_list(fybar,fy);
    lhs = fybar;
    *margin2+=lossval/m;
    //*margin+=lossval*ex[i].x.example_cost/m;
  }

  free(ybar_all);
  free(hbar_all);

  /* compact the linear representation */
  new_constraint = add_list_nn(lhs, sm->sizePsi);

//  printf("After this segfault ? \n");fflush(stdout);
//  printf("%x\n",new_constraint);

  free_svector(lhs);

  l=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) l++; // non-zero
  }
  words = (WORD*)my_malloc(sizeof(WORD)*(l+1)); 
  assert(words!=NULL);
  k=0;
  for (i=1;i<sm->sizePsi+1;i++) {
    if (fabs(new_constraint[i])>1E-10) {
      words[k].wnum = i;
      words[k].weight = new_constraint[i]; 
      k++;
    }
  }
  words[k].wnum = 0;
  words[k].weight = 0.0;
  fvec = create_svector(words,"",1);

  free(words);
  free(new_constraint);

  return(fvec); 

}
int main(int argc, char* argv[]) {
    double avghingeloss;
    LABEL y;
    long i, correct;
    double weighted_correct;

    char testfile[1024];
    char modelfile[1024];
    char labelfile[1024];
    char latentfile[1024];
    char scorefile[1024];
    FILE	*flabel;
    FILE	*flatent;
    FILE *fscore;

    STRUCTMODEL model;
    STRUCT_LEARN_PARM sparm;

    SAMPLE testsample;


    /* read input parameters */
    read_input_parameters(argc,argv,testfile,modelfile,labelfile,latentfile,scorefile,model.kernel_info_file,model.filestub, &sparm);

    printf("C: %f\n",sparm.C);
    flabel = fopen(labelfile,"w");
    flatent = fopen(latentfile,"w");
    fscore = fopen(scorefile, "w");

    init_struct_model(model.kernel_info_file, &model, &sparm);

    read_struct_model(modelfile, &model);


    /* read test examples */
    printf("Reading test examples..."); fflush(stdout);
    testsample = read_struct_examples(testfile, &model, &sparm);
    printf("done.\n");

    IMAGE_KERNEL_CACHE ** cached_images = init_cached_images(testsample.examples,&model);

    avghingeloss = 0.0;
    correct = 0;
    weighted_correct=0.0;
    int *valid_example_kernel = (int *) malloc(5*sizeof(int));
    for(i = 0; i < model.num_kernels; i++)
    valid_example_kernel[i] = 1;
    
    double total_example_weight = 0;
    int num_distinct_examples = 0;
    int last_image_id = -1;
    LATENT_VAR h = make_latent_var(&model);
	double * scores = (double *)calloc(sparm.n_classes, sizeof(double));
    for (i=0;i<testsample.n;i++) {
        while (testsample.examples[i].x.image_id == last_image_id) i++;
        last_image_id = testsample.examples[i].x.image_id;
        num_distinct_examples++;
        //    if(finlatent) {
        //        read_latent_var(&h,finlatent);
            //printf("%d %d\n",h.position_x,h.position_y);
        //    }
        //printf("%f\n",sparm.C);
            struct timeval start_time;
            struct timeval finish_time;
            gettimeofday(&start_time, NULL);

            classify_struct_example(testsample.examples[i].x,&y,&h,cached_images,&model,&sparm,1);

            gettimeofday(&finish_time, NULL);
            double microseconds = 1e6 * (finish_time.tv_sec - start_time.tv_sec) + (finish_time.tv_usec - start_time.tv_usec);
        //printf("This ESS call took %f milliseconds.\n", microseconds/1e3);

            total_example_weight += testsample.examples[i].x.example_cost;
            //double hinge_l = get_hinge_l_from_pos_score(pos_score,testsample.examples[i].y);
            //printf("with a pos_score of %f, a label of %d we get a hinge_l of %f\n", pos_score, testsample.examples[i].y.label, hinge_l);
       // double weighted_hinge_l = hinge_l * testsample.examples[i].x.example_cost;
        //avghingeloss += weighted_hinge_l;
        //if (hinge_l<1) {

        //A classification is considered "correct" if it guesses one of the objects in the image
        if (y.label == testsample.examples[i].y.label || testsample.examples[i].x.also_correct[y.label]) {
            correct++;
            weighted_correct+=testsample.examples[i].x.example_cost;
        }

        print_label(y, flabel);
        fprintf(flabel,"\n"); fflush(flabel);

        print_latent_var(testsample.examples[i].x, h, flatent);

        get_class_scores(testsample.examples[i].x, cached_images, scores, &model, &sparm);
        fprintf(fscore, "%s ", testsample.examples[i].x.image_path);
        for (int j = 0; j < sparm.n_classes; ++j) {
            fprintf(fscore, "%f ", scores[j]);
        }
        fprintf(fscore, "\n");
    }
    free_latent_var(h);	
    fclose(flabel);
    fclose(flatent);
	free(scores);

    //double w_cost = regularizaton_cost(model.w_curr.get_vec(), model.sizePsi);
    //avghingeloss =  avghingeloss/testsample.n;
    printf("\n");
    //printf("Objective Value with C=%f is %f\n\n\n", sparm.C, (sparm.C * avghingeloss) + w_cost);
    //printf("Average hinge loss on dataset: %.4f\n", avghingeloss);
    printf("Zero/one error on test set: %.4f\n", 1.0 - ((float) correct) / (1.0 * num_distinct_examples));
    printf("Weighted zero/one error on the test set %.4f\n", 	1.0 - (weighted_correct/total_example_weight));
    printf("zeroone %.4f weightedzeroone %.4f\n", 1.0 - ((float) correct) / (1.0 * num_distinct_examples), 1.0 - (weighted_correct/total_example_weight));  

    fclose(fscore);
    
    free_cached_images(cached_images, &model);
    //free_struct_sample(testsample);
    free_struct_model(model,&sparm);

    return(0);

}
SVECTOR* find_cutting_plane(EXAMPLE *ex, SVECTOR **fycache, double *margin, long m, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) {

    long i;
    SVECTOR *f, *fy, *fybar, *lhs;
    LABEL       ybar;
    LATENT_VAR hbar;
    double lossval;
    double *new_constraint;

    long l,k;
    SVECTOR *fvec;
    WORD *words;

    /* find cutting plane */
    lhs = NULL;
    *margin = 0;
    for (i=0; i<m; i++) {
        find_most_violated_constraint_marginrescaling(ex[i].x, ex[i].y, &ybar, &hbar, sm, sparm);
        /* get difference vector */
        fy = copy_svector(fycache[i]);
        fybar = psi(ex[i].x,ybar,hbar,sm,sparm);
        lossval = loss(ex[i].x,ex[i].y,ybar,hbar,sparm);
        free_label(ybar);
        free_latent_var(hbar);

        printf("psi=");
        for (int j = 0; j < sm->sizePsi; ++j)
            printf("%.4lf ", fybar->words[j].weight);
        printf("\n");

        /* scale difference vector */
        for (f=fy; f; f=f->next) {
            f->factor*=1.0/m;
            //f->factor*=ex[i].x.example_cost/m;
        }
        for (f=fybar; f; f=f->next) {
            f->factor*=-1.0/m;
            //f->factor*=-ex[i].x.example_cost/m;
        }
        /* add ybar to constraint */
        append_svector_list(fy,lhs);
        append_svector_list(fybar,fy);
        lhs = fybar;
        *margin+=lossval/m;
        //*margin+=lossval*ex[i].x.example_cost/m;
    }
    /* compact the linear representation */
    new_constraint = add_list_nn(lhs, sm->sizePsi);
    free_svector(lhs);

    /* DEBUG */
    printf("new_constraint=");
    for (i=1; i<sm->sizePsi+1; i++)
        printf("%.4lf ", new_constraint[i]);
    printf("\n");

    l=0;
    for (i=1; i<sm->sizePsi+1; i++) {
        if (fabs(new_constraint[i])>1E-10) l++; // non-zero
    }
    words = (WORD*)my_malloc(sizeof(WORD)*(l+1));
    assert(words!=NULL);
    k=0;
    for (i=1; i<sm->sizePsi+1; i++) {
        if (fabs(new_constraint[i])>1E-10) {
            words[k].wnum = i;
            words[k].weight = new_constraint[i];
            k++;
        }
    }
    words[k].wnum = 0;
    words[k].weight = 0.0;
    fvec = create_svector(words, NULL, 1);

    free(words);
    free(new_constraint);

    return(fvec);

}
int main(int argc, char* argv[]) {

    double *w; /* weight vector */
    int outer_iter;
    long m, i;
    double C, epsilon;
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    char trainfile[1024];
    char modelfile[1024];
    int MAX_ITER;
    /* new struct variables */
    SVECTOR **fycache, *diff, *fy;
    EXAMPLE *ex;
    SAMPLE sample;
    STRUCT_LEARN_PARM sparm;
    STRUCTMODEL sm;

    //double decrement;
    double primal_obj;//, last_primal_obj;
    //double cooling_eps;
    //double stop_crit;

    DebugConfiguration::VerbosityLevel = VerbosityLevel::None;

    /* read input parameters */
    my_read_input_parameters(argc, argv, trainfile, modelfile, &learn_parm, &kernel_parm, &sparm);

    epsilon = learn_parm.eps;
    C = learn_parm.svm_c;
    MAX_ITER = learn_parm.maxiter;

    /* read in examples */
    sample = read_struct_examples(trainfile,&sparm);
    ex = sample.examples;
    m = sample.n;

    /* initialization */
    init_struct_model(sample,&sm,&sparm,&learn_parm,&kernel_parm);
    w = sm.w;

    //w = create_nvector(sm.sizePsi);
    //clear_nvector(w, sm.sizePsi);
    //sm.w = w; /* establish link to w, as long as w does not change pointer */

    /* some training information */
    printf("C: %.8g\n", C);
    printf("epsilon: %.8g\n", epsilon);
    printf("sample.n: %ld\n", sample.n);
    printf("sm.sizePsi: %ld\n", sm.sizePsi);
    fflush(stdout);

    /* impute latent variable for first iteration */
    init_latent_variables(&sample,&learn_parm,&sm,&sparm);

    /* prepare feature vector cache for correct labels with imputed latent variables */
    fycache = (SVECTOR**)malloc(m*sizeof(SVECTOR*));
    for (i=0; i<m; i++) {
        fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);

        /* DEBUG */
        printf("true_psi[%d]=", i);
        for (int j = 0; j < sm.sizePsi; ++j)
            printf("%.4lf ", fy->words[j].weight);
        printf("\n");

        diff = add_list_ss(fy);
        free_svector(fy);
        fy = diff;
        fycache[i] = fy;
    }

    /* outer loop: latent variable imputation */
    outer_iter = 1;
    //last_primal_obj = 0;
    //decrement = 0;
    //cooling_eps = 0.5*C*epsilon;
    //while ((outer_iter<=MIN_OUTER_ITER)||((!stop_crit)&&(outer_iter<MAX_OUTER_ITER))) {
    while (outer_iter<MAX_OUTER_ITER) {
        LearningTracker::NextOuterIteration();
        printf("OUTER ITER %d\n", outer_iter);
        /* cutting plane algorithm */
        primal_obj = cutting_plane_algorithm(w, m, MAX_ITER, C, /*cooling_eps, */fycache, ex, &sm, &sparm);

        /* compute decrement in objective in this outer iteration */
        /*
        decrement = last_primal_obj - primal_obj;
        last_primal_obj = primal_obj;
        printf("primal objective: %.4f\n", primal_obj);
        printf("decrement: %.4f\n", decrement); fflush(stdout);
        stop_crit = (decrement<C*epsilon)&&(cooling_eps<0.5*C*epsilon+1E-8);
        cooling_eps = -decrement*0.01;
        cooling_eps = MAX(cooling_eps, 0.5*C*epsilon);
        printf("cooling_eps: %.8g\n", cooling_eps); */

        /* print new weights */
        printf("W=");
        for (i = 1; i <= sm.sizePsi; ++i)
            printf("%.3f ", sm.w[i]);
        printf("\n");

        /* Save model */
        char modelfile_tmp[1024];
        sprintf(modelfile_tmp, "%s.%d", modelfile, outer_iter);
        write_struct_model(modelfile_tmp, &sm, &sparm);

        /* impute latent variable using updated weight vector */
        for (i=0; i<m; i++) {
            free_latent_var(ex[i].h);
            ex[i].h = infer_latent_variables(ex[i].x, ex[i].y, &sm, &sparm);
        }
        /* re-compute feature vector cache */
        for (i=0; i<m; i++) {
            free_svector(fycache[i]);
            fy = psi(ex[i].x, ex[i].y, ex[i].h, &sm, &sparm);

            /* DEBUG */
            printf("true_psi[%d]=", i);
            for (int j = 0; j < sm.sizePsi; ++j)
                printf("%.4lf ", fy->words[j].weight);
            printf("\n");

            diff = add_list_ss(fy);
            free_svector(fy);
            fy = diff;
            fycache[i] = fy;
        }

        outer_iter++;
    } // end outer loop


    /* write structural model */
    write_struct_model(modelfile, &sm, &sparm);
    // skip testing for the moment

    /* free memory */
    free_struct_sample(sample);
    free_struct_model(sm, &sparm);
    for(i=0; i<m; i++) {
        free_svector(fycache[i]);
    }
    free(fycache);

    return(0);

}