void run(unsigned int nbins, char * filename) {
	gsl_vector * min;
	gsl_vector * max;
	ndim_histogram * h;
	unsigned int ncolumns;

	debug("looking for number of columns ...");
	ncolumns = get_column_count(filename);
	dump_i("number of columns", ncolumns);
	min = gsl_vector_alloc(ncolumns);
	max = gsl_vector_alloc(ncolumns);

	debug("finding minima/maxima ...");
	dump_s("in file", filename);
	find_min_max(filename, min, max);
	dump_v("minima", min);
	dump_v("maxima", max);
	debug("creating histogram cube ...");
	h = ndim_histogram_alloc(nbins, min, max);

	debug("filling histogram ... ");
	append_to_hist(h, filename);

	output_hist(h);
	ndim_histogram_free(h);
}
示例#2
0
void mcmc_load_data(mcmc * m, const char * datafilename) {
	IFDEBUGPARSER
	dump_s("looking for data in file", datafilename);
	load_data(m, datafilename);
	mcmc_check(m);
	IFDEBUGPARSER
	debug("loading data successful")
}
示例#3
0
int main(int ARGC, char *ARGV[]) {
  double loglik,bestloglik;
  int iter;
  int D;
  int K;
  int** M;
  int* N;
  int V;
  double alpha;
  double eta;
  double psi;
  double phi;
  double** post_beta;
  double** post_theta;
  double* post_gamma;
  double* post_pi;
  int*** w;
  int*** s;     /* Stop words */
  int*** z;
  int*** best_z;
  int** labels;
  int malloc_dim_1;
  int malloc_dim_2;
  int malloc_dim_3;
  int nIter;
  int dumpInterval;

  if(ARGC != 5) {
    fprintf(stderr, "usage %s <entities> <labels> <nIter> <dumpInterval>\n", ARGV[0]);
    exit(1);
  }

  fprintf(stderr, "-- This program was automatically generated using HBC (v 0.7 beta) from LDA.hier\n--     see http://hal3.name/HBC for more information\n");
  fflush(stderr);
  setall(time(0),time(0));   /* initialize random number generator */


  /* variables defined with --define */
  K = 40;
  //alpha = 0.1;
  alpha = 1.0;
  psi = 1.0;
  phi = 1.0;
  eta = 0.1;

  fprintf(stderr, "Loading data...\n");
  fflush(stderr);
  /* variables defined with --loadD */
  w = load_discrete3(ARGV[1], &D, &N, &M, &V);
  labels = load_labels(ARGV[2], &D, &K);
  nIter = atoi(ARGV[3]);
  dumpInterval = atoi(ARGV[4]);

  /* variables defined with --loadM or --loadMI */

  fprintf(stderr, "Allocating memory...\n");
  fflush(stderr);

  post_gamma = (double *) malloc(sizeof(double) * (V + 1));
  
  post_pi = (double *) malloc(sizeof(double) * (2 + 1));  

  post_beta = (double**) malloc(sizeof(double*) * (1+(K)-(1)));
  for (malloc_dim_1=1; malloc_dim_1<=K; malloc_dim_1++) {
    post_beta[malloc_dim_1-1] = (double*) malloc(sizeof(double) * (1+((V) + (1))-(1)));
  }

  post_theta = (double**) malloc(sizeof(double*) * (1+(D)-(1)));
  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    post_theta[malloc_dim_1-1] = (double*) malloc(sizeof(double) * (1+((K) + (1))-(1)));
  }

  s = (int***) malloc(sizeof(int**) * (1+(D)-(1)));
  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    s[malloc_dim_1-1] = (int**) malloc(sizeof(int*) * (1+(N[malloc_dim_1-1])-(1)));
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      s[malloc_dim_1-1][malloc_dim_2-1] = (int*) malloc(sizeof(int) * (1+((M[malloc_dim_1-1][malloc_dim_2-1]) + (1))-(1)));
    }
  }

  z = (int***) malloc(sizeof(int**) * (1+(D)-(1)));
  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    z[malloc_dim_1-1] = (int**) malloc(sizeof(int*) * (1+(N[malloc_dim_1-1])-(1)));
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      z[malloc_dim_1-1][malloc_dim_2-1] = (int*) malloc(sizeof(int) * (1+((M[malloc_dim_1-1][malloc_dim_2-1]) + (1))-(1)));
    }
  }

  best_z = (int***) malloc(sizeof(int**) * (1+(D)-(1)));
  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    best_z[malloc_dim_1-1] = (int**) malloc(sizeof(int*) * (1+(N[malloc_dim_1-1])-(1)));
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      best_z[malloc_dim_1-1][malloc_dim_2-1] = (int*) malloc(sizeof(int) * (1+((M[malloc_dim_1-1][malloc_dim_2-1]) + (1))-(1)));
    }
  }


  fprintf(stderr, "Initializing variables...\n");
  fflush(stderr);
  initialize_z(z, D, M, N, K);
  initialize_s(s, D, M, N);
  initialize_post_beta(post_beta, D, K, M, N, V, w, z);
  initialize_post_theta(post_theta, D, K, M, N, z);
  initialize_post_gamma(post_gamma, D, M, N, V, w, s);
  initialize_post_pi(post_pi, D, M, N, s);

  for (iter=1; iter<=nIter; iter++) {
    fprintf(stderr, "iter %d", iter);
    fflush(stderr);
    resample_z(D, M, N, alpha, eta, psi, post_beta, post_theta, w, z, s, labels, K, V);
    resample_s(D, M, N, alpha, eta, psi, phi, post_beta, post_theta, post_gamma, post_pi, w, z, s, labels, K, V);

    loglik = compute_log_posterior(D, K, M, N, V, alpha, post_beta, eta, post_theta, w, z);
    assert(!isnan(loglik));
    fprintf(stderr, "\t%g", loglik);
    if ((iter==1)||(loglik>bestloglik)) {
      bestloglik = loglik;
      fprintf(stderr, " *");
      best_z = (int***) realloc(best_z, sizeof(int**) * ((D) - (1) + 1));
      for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
        best_z[malloc_dim_1-1] = (int**) realloc(best_z[malloc_dim_1-1], sizeof(int*) * ((N[malloc_dim_1-1]) - (1) + 1));
        for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
          best_z[malloc_dim_1-1][malloc_dim_2-1] = (int*) realloc(best_z[malloc_dim_1-1][malloc_dim_2-1], sizeof(int) * ((M[malloc_dim_1-1][malloc_dim_2-1]) - (1) + 1));
          for (malloc_dim_3=1; malloc_dim_3<=M[malloc_dim_1-1][malloc_dim_2-1]; malloc_dim_3++) {
            best_z[malloc_dim_1-1][malloc_dim_2-1][malloc_dim_3-1] = z[malloc_dim_1-1][malloc_dim_2-1][malloc_dim_3-1];
          }
        }
      }
    }

    if(iter % dumpInterval == 0) {
      dump_z(D, M, N, z);
      dump_s(D, M, N, s);
    }

    fprintf(stderr, "\n");
    fflush(stderr);
  }

  //printf("ll = %g\n", bestloglik);
  //dump_z(D, M, N, best_z);
  //dump_z(D, M, N, z);

  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      free(best_z[malloc_dim_1-1][malloc_dim_2-1]);
    }
    free(best_z[malloc_dim_1-1]);
  }
  free(best_z);

  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      free(z[malloc_dim_1-1][malloc_dim_2-1]);
    }
    free(z[malloc_dim_1-1]);
  }
  free(z);

  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    for (malloc_dim_2=1; malloc_dim_2<=N[malloc_dim_1-1]; malloc_dim_2++) {
      free(w[malloc_dim_1-1][malloc_dim_2-1]);
    }
    free(w[malloc_dim_1-1]);
  }
  free(w);

  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    free(post_theta[malloc_dim_1-1]);
  }
  free(post_theta);

  for (malloc_dim_1=1; malloc_dim_1<=K; malloc_dim_1++) {
    free(post_beta[malloc_dim_1-1]);
  }
  free(post_beta);

  free(N);

  for (malloc_dim_1=1; malloc_dim_1<=D; malloc_dim_1++) {
    free(M[malloc_dim_1-1]);
  }
  free(M);


  return 0;
}