Example #1
0
BNTree* train_bntree (char *treestr, char *modelstr, char *ss_filename, char *model_name, char *output_filename) {
  FILE *ss_fp, *out_fp;
  int model_type;
  double *background_probs;
  char **tuple_arr;
  double *count_arr;
  int num_tuples = 0;
  double ll;
  double last_ll;
  int iteration;
  int converged;
  char tag_str[STRLEN];
  char val_str[STRLEN];
  int u;
  int i, j, k;
  Node *node;
  int param_num;
  double row_sum;
  double param_val;
  CellListItem *cli;

  BNTree *bntree = NewBNTree(0); /* no cache for training (for now) */

  strcpy (bntree->name, model_name);

  if (strcmp (modelstr, "TT0") == 0) {
    model_type = TT0;
    bntree->order = 0;
    bntree->num_states = 6;
  }
  else if (strcmp (modelstr, "TT1") == 0) {
    model_type = TT1;
    bntree->order = 1;
    bntree->num_states = 36;
  }
  else if (strcmp (modelstr, "TT2") == 0) {
    model_type = TT2;
    bntree->order = 2;
    bntree->num_states = 216;
  }
  else if (strcmp (modelstr, "R0") == 0) {
    model_type = R0;
    bntree->order = 0;
    bntree->num_states = 6;
    bntree->num_params = 18;
  }
  else if (strcmp (modelstr, "R1") == 0) {
    model_type = R1;
    bntree->order = 1;
    bntree->num_states = 36;
    bntree->num_params = 236;
  }
  else if (strcmp (modelstr, "R2") == 0) {
    model_type = R2;
    bntree->order = 2;
    bntree->num_states = 216;
  }
  else if (strcmp (modelstr, "G0") == 0) {
    model_type = G0;
    bntree->order = 0;
    bntree->num_states = 6;
    bntree->num_params = 36;
  }
  else if (strcmp (modelstr, "G1") == 0) {
    model_type = G1;
    bntree->order = 1;
    bntree->num_states = 36;
    bntree->num_params = 1296;
  }
  else if (strcmp (modelstr, "G2") == 0) {
    model_type = G2;
    bntree->order = 2;
    bntree->num_states = 216;
    bntree->num_params = 46656;
  }
  else {
    fprintf (stderr, "Unknown probability model type: %s\n", modelstr);
    exit(-1);
  }
    
  /* initialize tree structure */
  parse_tree_string (bntree, treestr);

  /* parse the sufficient statistics file */
  ss_fp = fopen (ss_filename, "r");
  while (fscanf (ss_fp, "%s = %s\n", tag_str, val_str) == 2) {
    if (strcmp (tag_str, "NTUPLES") == 0)
	num_tuples = atoi(val_str);
  }
  fclose (ss_fp);

  tuple_arr = (char **) malloc (num_tuples * sizeof (char *));
  for (i=0; i<num_tuples; i++)
    tuple_arr[i] = (char *) malloc (STRLEN * sizeof (char));

  count_arr = (double *) malloc (num_tuples * sizeof (double));
  background_probs = (double *) malloc (bntree->num_states * sizeof (double));

  parse_ss (bntree, ss_filename, tuple_arr, count_arr, background_probs);


  /* initialize parameters */
  bntree->param_map = (int **)malloc(bntree->num_states * sizeof(int *));
  for (i=0; i<bntree->num_states; i++)
    bntree->param_map[i] = (int *)malloc(bntree->num_states * sizeof(int));

  bntree->weight_idx = (int **)malloc(bntree->num_states * sizeof(int *));
  for (i=0; i<bntree->num_states; i++)
    bntree->weight_idx[i] = (int *)malloc(bntree->num_states * sizeof(int));

  if (model_type == R0)
    init_params_R0 (bntree);
  else if (model_type == G0)
    init_params_G0 (bntree);

  else if (model_type == R1)
    init_params_R1 (bntree);
  else if (model_type == G1)
    init_params_G1 (bntree);

  else if (model_type == G2)
    init_params_G2 (bntree);
  else {
    fprintf (stderr, "Model type %s not yet supported\n", modelstr);
    exit (-1);
  }


  /* create inverse parameter map */
  bntree->inv_param_map = (CellListItem **)malloc(bntree->num_params * sizeof (CellListItem *));
  for (i=0; i<bntree->num_params; i++)
    bntree->inv_param_map[i] = NewCellList();
  
  for (i=0; i<bntree->num_states; i++) {
    for (j=0; j<bntree->num_states; j++) {
      param_num = bntree->param_map[i][j];
      cell_list_append(bntree->inv_param_map[param_num], i, j);
    }
  }


  /* allocate local CPDs and expectations */
  for (u=0; u<bntree->num_nodes; u++) {
    node = bntree->preorder[u];
    if (node->parent == NULL) continue;
    
    node->E = (double **)malloc(bntree->num_states * sizeof(double *));
    for (i=0; i<bntree->num_states; i++)
      node->E[i] = (double *)malloc(bntree->num_states * sizeof(double));

    node->P = (float **)malloc(bntree->num_states * sizeof(float *));
    for (i=0; i<bntree->num_states; i++)
      node->P[i] = (float *)malloc(bntree->num_states * sizeof(float));

    node->W = (double **)malloc(bntree->num_states * sizeof(double *));
    for (i=0; i<bntree->num_states; i++)
      node->W[i] = (double *)malloc(bntree->num_states * sizeof(double));
  }

  /* randomly initialize the parameters in a way that guarantees all 
     probabilities will be within 0 and 1, and all rows sum to 1 */
  /* also, self substitution probs will be at least 0.5 */
  for (u=0; u<bntree->num_nodes; u++) {
    node = bntree->preorder[u];
    if (node->parent == NULL) continue;

    srand(time(NULL));
    for (k=0; k<bntree->num_params - bntree->num_states; k++) {
      param_val = bounded_random (0.05, 0.50) / (bntree->num_states - 1);
      for (cli = bntree->inv_param_map[k]; cli != NULL; cli = cli->next) {
	i = cli->row;
	j = cli->col;
	node->P[i][j] = param_val;
      }
    }
    for (i=0; i<bntree->num_states; i++) {
      row_sum = 0;
      for (j=0; j<bntree->num_states; j++) {
	if (j != i) {
	  row_sum += node->P[i][j];
	}
      }
      node->P[i][i] = 1 - row_sum;
    }
  }

  /* run EM algorithm */
  /* We'll output the current tree after every iteration */
  iteration = 0;
  converged = 0;
  ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0);
  fprintf (stderr, "Initial log likelihood: %f\n", ll);
  while (! converged) {
    iteration++;

    /* improve likelihood */
    Estep(bntree, tuple_arr, count_arr, num_tuples);
    Mstep(bntree);

    /* output new model */
    out_fp = fopen (output_filename, "w");
    print_tree (out_fp, bntree);
    fclose (out_fp);

    /* check for convergence */
    last_ll = ll;
    ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0);
    fprintf (stderr, "EM iteration %d, log likelihood: %f\n", iteration, ll);
    if ( (last_ll - ll) / last_ll < EM_TOL || iteration >= MAX_EM_ITERS)
      converged = 1;
  }
  
  return bntree;
}
Example #2
0
void map_append(map *map, int t, int x, int y) {
	cell_list *level = map_get_level(map, t);
	
	cell_list_append(level, x, y);
}