Example #1
0
mm_model *mm_orderzero_train (char *model_name, dataset *d, int model_column)
{
  mm_model *mm = mm_new (model_name, model_column);

  /* initialise GSL probability matrices of these sizes */

  mm->p_initial = gsl_vector_calloc (mm->inputstates);
  mm->logp_initial = gsl_vector_calloc (mm->inputstates);
  mm->p_transition = 0;
  mm->logp_transition = 0;

  /* loop through files in dataset */

  // printf ("Training MM from input files: ");

  for (int entry = 0; entry < d->entries; entry++)
    {
      // printf ("%d ", training_filename_number);

      sequence_data *s = sequence_read (model_name, DATAFILE_FORMAT, d->filenames[entry]);

      int *state;
      if (model_column == 0)
	{
	  state = s->hidden;
	}
      else
	{
	  state = s->visible;
	}

      /* train probability matrices */

      for (int t = 0; t < s->sequencelength; t++)
      {
	double n = gsl_vector_get (mm->p_initial, state[t]) + 1.;
	gsl_vector_set (mm->p_initial, state[t], n);
      }

      sequence_free (s);
    }

  // printf ("\n\n");

  /* smooth (add-k), normalise, precalculate logs */

  vector_smooth_normalise_log (mm->p_initial, mm->logp_initial);

  mm->normalised_flag = 1;

  return mm;
}
Example #2
0
/* Initialize new World */
struct World * world_new(uint32_t seed){
    struct World * self;
    struct Random * random;
    
    if (!size){
        size = mm_new(sizeof(struct World));
    }
    
    if ((self = mm_alloc(size))){
        self->seed = seed;
    	random = random_new(seed);
    	self->altitude = terrain_new(random_random(random));
    	self->climate = climate_new(random_random(random));
    	self->evil = climate_new(random_random(random));
    	random_del(random);
    }
    return self;
}
Example #3
0
File: main.c Project: zipo/zipo
static void _sm_signal_hup(int signum)
{
    config_t conf;

    log_write(sm->log, LOG_NOTICE, "HUP handled. reloading modules...");

    sm_logrotate = 1;

    /* reload dynamic modules */
    conf = config_new();
    if (conf && config_load(conf, config_file) == 0) {
        config_free(sm->config);
        sm->config = conf;
        /*_sm_config_expand(sm);*/ /* we want to reload modules only */
    } else {
        log_write(sm->log, LOG_WARNING, "couldn't reload config (%s)", config_file);
        if (conf) config_free(conf);
    }
    mm_free(sm->mm);
    sm->mm = mm_new(sm);
}
int main(int argc, char* argv[]) {
  FILE* F;
  TreeModel *model;
  int i, j, k, alph_size, nstates, do_eqfreqs = 0, exch_mode = 0, 
    list_mode = 0, latex_mode = 0, suppress_diag = 0, ti_tv = 0, 
    scientific_mode = 0,
    induced_aa = 0, do_stop_codons = 0, do_zeroes = 0, symmetric = 0, 
    context_ti_tv = 0, all_branches = 0;
  int startcol, endcol, ncols, branch_no = 0, matrix_idx = 0;
/*   int aa_inv[256]; */
  double t = -1, total_ti = 0, total_tv = 0, rho_s = 0, cpg_ti = 0, 
    cpg_tv = 0, non_cpg_ti = 0, non_cpg_tv = 0, cpg_eqfreq = 0;
  char *rate_format_string = "%8.6f";
  MarkovMatrix *M;
  char c;
  char tuple[5], tuple2[5]; /* , aa_alph[50]; */
  char *subst_mat_fname = NULL, *subst_score_fname = NULL, 
    *subst_mat_fname_paml = NULL, *order1_mod_fname = NULL;
  Matrix *subst_mat = NULL;
  List *matrix_list = lst_new_ptr(20), *traversal = NULL;

  while ((c = (char)getopt(argc, argv, "t:fedlLiM:N:A:B:aszSECh")) != -1) {
   switch(c) {
    case 't':
      if (optarg[0] == 'A') all_branches = 1;
      else t = get_arg_dbl_bounds(optarg, 0, INFTY);
      break;
    case 'f':
      do_eqfreqs = 1;
      break;
    case 'e':
      exch_mode = 1;
      break;
    case 'd':
      suppress_diag = 1;
      break;
    case 'l':
      list_mode = 1;
      break;
    case 'L':
      latex_mode = 1;
      break;
    case 'i':
      ti_tv = 1;
      break;
    case 'M':
      subst_mat_fname = optarg;
      induced_aa = 1;
      break;
    case 'N':
      subst_mat_fname_paml = optarg;
      induced_aa = 1;
      break;
    case 'A':
      subst_score_fname = optarg;
      break;
    case 'B':
      order1_mod_fname = optarg;
      break;
    case 'a':
      induced_aa = 1;
      do_zeroes = 1;
      break;
    case 's':
      do_stop_codons = 1;
      break;
    case 'z':
      do_zeroes = 1;
      break;
    case 'S':
      symmetric = 1;
      break;
    case 'E':
      scientific_mode = 1;
      rate_format_string = "%13.6e";
      break;
    case 'C':
      context_ti_tv = 1;
      break;
    case 'h':
      print_usage();
      exit(0);
    case '?':
      die("Unrecognized option.  Try \"display_rate_matrix -h\" for help.\n");
    }
  }

  set_seed(-1);

  if ((t >= 0 && exch_mode) || (latex_mode && list_mode) || 
      ((ti_tv || subst_mat_fname != NULL || subst_score_fname != NULL || 
        subst_mat_fname_paml != NULL || scientific_mode) && !list_mode) || 
      (subst_mat_fname != NULL && subst_score_fname != NULL) || 
      (subst_score_fname != NULL && subst_mat_fname_paml != NULL) || 
      (subst_mat_fname != NULL && subst_mat_fname_paml != NULL) || 
      optind != argc - 1) {
    die("ERROR: missing required arguments or illegal combination of arguments.\nTry \"display_rate_matrix -h\" for help.\n");
  }

  F = phast_fopen(argv[optind], "r");
  model = tm_new_from_file(F, 1);

  if (context_ti_tv) {
    /* this option requires completely different handling from the others */
    if (model->order != 2) { 
      die("ERROR: -C requires a model of order 3.\n");
    }
    do_context_dependent_ti_tv(model);
    exit(0);
  }

  if (induced_aa) {
    TreeModel *aa_model = tm_induced_aa(model);
    char *codon_to_aa = get_codon_mapping(model->rate_matrix->states);

    /* before freeing model, grab the expected rate of synonymous
       subst, rho_s */
    for (i = 0; i < model->rate_matrix->size; i++)
      for (j = 0; j < model->rate_matrix->size; j++)
        if (i != j && codon_to_aa[i] == codon_to_aa[j])
          rho_s += mm_get(model->rate_matrix, i, j) * 
            vec_get(model->backgd_freqs, i);

    sfree(codon_to_aa);

    tm_free(model);
    model = aa_model;
  }

  if (all_branches) {
    traversal = tr_inorder(model->tree);
    for (matrix_idx = 0; matrix_idx < lst_size(traversal); matrix_idx++) {
      TreeNode *n = lst_get_ptr(traversal, matrix_idx);
      if (n->parent == NULL) { lst_push_ptr(matrix_list, NULL); continue; }
      M = mm_new(model->rate_matrix->size, model->rate_matrix->states, DISCRETE);
      mm_exp(M, model->rate_matrix, n->dparent);
      lst_push_ptr(matrix_list, M);      
    }
  }
  else if (t >= 0) {
    M = mm_new(model->rate_matrix->size, model->rate_matrix->states, DISCRETE);
    mm_exp(M, model->rate_matrix, t);
    lst_push_ptr(matrix_list, M);
  }
  else 
    lst_push_ptr(matrix_list, model->rate_matrix);

  alph_size = (int)strlen(model->rate_matrix->states);
  nstates = model->rate_matrix->size;

  if (subst_mat_fname != NULL) {
    if ((F = fopen(subst_mat_fname, "r")) == NULL) {
      die("ERROR: Can't open %s.\n", subst_mat_fname);
    }    
    subst_mat = read_subst_mat(F, AA_ALPHABET); 
  }
  else if (subst_mat_fname_paml != NULL) {
    if ((F = fopen(subst_mat_fname_paml, "r")) == NULL) {
      die("ERROR: Can't open %s.\n", subst_mat_fname_paml);
    }    
    subst_mat = read_paml_matrix(F, AA_ALPHABET); 
  }
  else if (subst_score_fname != NULL) {
    if ((F = fopen(subst_score_fname, "r")) == NULL) {
      die("ERROR: Can't open %s.\n", subst_score_fname);
    }    
    subst_mat = read_subst_scores(model, F);
  }
  else if (order1_mod_fname != NULL) {
    if ((F = fopen(order1_mod_fname, "r")) == NULL) {
      die("ERROR: Can't open %s.\n", order1_mod_fname);
    }    
    subst_mat = unproject_rates(model, tm_new_from_file(F, 1));
  }

  /* loop through matrices to print */
  for (matrix_idx = 0; matrix_idx < lst_size(matrix_list); matrix_idx++) {
    M = lst_get_ptr(matrix_list, matrix_idx);

    if (all_branches) {
      if (M == NULL) continue;  /* root */
      printf("BRANCH %d (t = %.6f)\n", ++branch_no,
             ((TreeNode*)lst_get_ptr(traversal, matrix_idx))->dparent);
    }

  /* print no more than 16 columns at a time (except with -a) */
  ncols = (induced_aa ? nstates : 16);
  for (startcol = 0; startcol < nstates; startcol += ncols) {
    endcol = min(nstates, startcol+ncols);

    /* table header */
    if (! list_mode) {
      if (latex_mode) {
        printf("\\begin{tabular}{|c|");
        for (i = startcol; i < endcol; i++) printf("r");
        printf("|}\n\\hline\n");
      }
      printf("%-5s ", "");
      if (latex_mode) printf("& ");
      for (i = startcol; i < endcol; i++) {
        get_state_tuple(model, tuple, i);
        if (latex_mode) {
          printf("{\\bf %s}", tuple);
          if (i < endcol-1) printf("& ");
        }
        else printf("%8s ", tuple);
    }
      if (latex_mode) printf("\\\\\n\\hline\n");
      else printf("\n");
    }

    /* table or list contents */
    for (i = 0; i < nstates; i++) {
      if (induced_aa && AA_ALPHABET[i] == '$' && !do_stop_codons) continue;
      get_state_tuple(model, tuple, i);

      /* get total eq freq of tuples containing CpG dinucs */
      for (k = 0; k < model->order; k++) {
        if (tuple[k] == 'C' && tuple[k+1] == 'G') {
          cpg_eqfreq += vec_get(model->backgd_freqs, i);
/*           printf("***CPG***"); */
          break;
        }
      }

      if (latex_mode) printf("{\\bf %s}& ", tuple);
      else if (!list_mode) printf("%-5s ", tuple);
      for (j = startcol; j < endcol; j++) {
        if (induced_aa && AA_ALPHABET[j] == '$' && !do_stop_codons) continue;
        if (latex_mode) printf("$");
        if (list_mode) {
          if (symmetric && j <= i) continue;
          else if ((t < 0 && ! all_branches) 
		   && (i == j || (!do_zeroes && mm_get(M, i, j) == 0))) 
            continue;
          get_state_tuple(model, tuple2, j);
          printf("%-5s %-5s ", tuple, tuple2);
        }
        if (i == j && suppress_diag && !list_mode) printf("%-7s", "-");
        else { 
	  /* get rate or probability */
	  double val = exch_mode == 0 ? mm_get(M, i, j) : 
	    safediv(mm_get(M, i, j), vec_get(model->backgd_freqs,j));
	  /* print value in format %8.6f or %13.6e */
	  printf(rate_format_string, val); 
	  printf(" ");
	}
        if (latex_mode) {
          printf("$");
          if (j < endcol-1) printf("& ");
        }
        else if (list_mode) {
          int ti, is_cpg;
          if (ti_tv) {
            ti = -1;
            is_cpg = 0;
            for (k = 0; k <= model->order; k++) {
              int dig_i = (i % int_pow(alph_size, k+1)) / int_pow(alph_size, k);
              int dig_j = (j % int_pow(alph_size, k+1)) / int_pow(alph_size, k);
              char next_char = '\0', prev_char = '\0';
              if (dig_i != dig_j) {
                ti = is_transition(M->states[dig_i], M->states[dig_j]);
                if (k != model->order)
                  prev_char = M->states[(i % int_pow(alph_size, k+2)) / 
                                        int_pow(alph_size, k+1)];
                if (k != 0)
                  next_char = M->states[(i % int_pow(alph_size, k)) / 
                                        int_pow(alph_size, k-1)];
                if ((M->states[dig_i] == 'C' && next_char == 'G') || 
                    (M->states[dig_i] == 'G' && prev_char == 'C')) 
                  is_cpg = 1;
              }
            }
	    if (ti == -1)
	      die("ERROR ti=-1\n");
            printf("%5s ", ti ? "ti" : "tv");
/*             printf("%5s ", is_cpg ? "CPG" : "-"); */
            if (ti) {
              total_ti += mm_get(M, i, j) * 
                vec_get(model->backgd_freqs, i);
              if (is_cpg) 
                cpg_ti += mm_get(M, i, j) * 
                  vec_get(model->backgd_freqs, i);
              else non_cpg_ti += mm_get(M, i, j) * 
                     vec_get(model->backgd_freqs, i);
            }
            else {
              total_tv += mm_get(M, i, j) * 
                vec_get(model->backgd_freqs, i);
              if (is_cpg)
                cpg_tv += mm_get(M, i, j) * 
                  vec_get(model->backgd_freqs, i);
              else non_cpg_tv += mm_get(M, i, j) * 
                     vec_get(model->backgd_freqs, i);
            }
          }
          if (subst_mat != NULL) {
            if (mat_get(subst_mat, i, j) == NEGINFTY) 
              printf("%8s", "-"); 
            else printf("%8.4f", mat_get(subst_mat, i, j)); 
          }
          printf("\n");
        }
      }
      if (latex_mode) printf("\\\\\n");
      else if (!list_mode) printf("\n");
    }
    
    /* equilibrium freqs (table case only) */
    if (do_eqfreqs && ! list_mode) {
      if (latex_mode) 
        printf("\\hline\n$\\boldsymbol{\\mathbf{\\pi}}$&");
      else 
        printf("%-5s ", "pi");
      for (i = startcol; i < endcol; i++) {
        if (latex_mode) 
          printf("$%8.4f$ ", vec_get(model->backgd_freqs, i));      
        else 
          printf("%8.4f ", vec_get(model->backgd_freqs, i));      
        if (latex_mode && i < endcol-1) printf("& ");
      }
      if (latex_mode) printf("\\\\\n");
      else printf("\n");
    }

    if (latex_mode) printf("\\hline\n\\end{tabular}\n\n");
  }

  /* equilibrium freqs (list case only) */
  if (do_eqfreqs &&  list_mode) {
    for (i = 0; i < nstates; i++) {
      get_state_tuple(model, tuple, i);
      printf("%-5s %-5s ", "-", tuple); //!!
      printf(rate_format_string, vec_get(model->backgd_freqs, i)); 
      printf("\n");
    }
  }
  
  if (ti_tv && list_mode) {
    printf("\n#Total ti/tv = %.4f\n", total_ti/total_tv);
    printf("#CpG ti ratio = %.4f, CpG tv ratio = %.4f\n", 
           cpg_ti/non_cpg_ti /* * (1 - cpg_eqfreq) */ / cpg_eqfreq, 
           cpg_tv/non_cpg_tv /* * (1 - cpg_eqfreq) */ / cpg_eqfreq);
  }
  else if (induced_aa) 
    printf("\n#Total rho_s/rho_v = %.4f\n", rho_s/(3-rho_s));

  if (all_branches == 1) printf("\n\n");
  }

  tm_free(model);
  lst_free(matrix_list);

  return 0;
}