mm_model *mm_orderzero_train (char *model_name, dataset *d, int model_column) { mm_model *mm = mm_new (model_name, model_column); /* initialise GSL probability matrices of these sizes */ mm->p_initial = gsl_vector_calloc (mm->inputstates); mm->logp_initial = gsl_vector_calloc (mm->inputstates); mm->p_transition = 0; mm->logp_transition = 0; /* loop through files in dataset */ // printf ("Training MM from input files: "); for (int entry = 0; entry < d->entries; entry++) { // printf ("%d ", training_filename_number); sequence_data *s = sequence_read (model_name, DATAFILE_FORMAT, d->filenames[entry]); int *state; if (model_column == 0) { state = s->hidden; } else { state = s->visible; } /* train probability matrices */ for (int t = 0; t < s->sequencelength; t++) { double n = gsl_vector_get (mm->p_initial, state[t]) + 1.; gsl_vector_set (mm->p_initial, state[t], n); } sequence_free (s); } // printf ("\n\n"); /* smooth (add-k), normalise, precalculate logs */ vector_smooth_normalise_log (mm->p_initial, mm->logp_initial); mm->normalised_flag = 1; return mm; }
/* Initialize new World */ struct World * world_new(uint32_t seed){ struct World * self; struct Random * random; if (!size){ size = mm_new(sizeof(struct World)); } if ((self = mm_alloc(size))){ self->seed = seed; random = random_new(seed); self->altitude = terrain_new(random_random(random)); self->climate = climate_new(random_random(random)); self->evil = climate_new(random_random(random)); random_del(random); } return self; }
static void _sm_signal_hup(int signum) { config_t conf; log_write(sm->log, LOG_NOTICE, "HUP handled. reloading modules..."); sm_logrotate = 1; /* reload dynamic modules */ conf = config_new(); if (conf && config_load(conf, config_file) == 0) { config_free(sm->config); sm->config = conf; /*_sm_config_expand(sm);*/ /* we want to reload modules only */ } else { log_write(sm->log, LOG_WARNING, "couldn't reload config (%s)", config_file); if (conf) config_free(conf); } mm_free(sm->mm); sm->mm = mm_new(sm); }
int main(int argc, char* argv[]) { FILE* F; TreeModel *model; int i, j, k, alph_size, nstates, do_eqfreqs = 0, exch_mode = 0, list_mode = 0, latex_mode = 0, suppress_diag = 0, ti_tv = 0, scientific_mode = 0, induced_aa = 0, do_stop_codons = 0, do_zeroes = 0, symmetric = 0, context_ti_tv = 0, all_branches = 0; int startcol, endcol, ncols, branch_no = 0, matrix_idx = 0; /* int aa_inv[256]; */ double t = -1, total_ti = 0, total_tv = 0, rho_s = 0, cpg_ti = 0, cpg_tv = 0, non_cpg_ti = 0, non_cpg_tv = 0, cpg_eqfreq = 0; char *rate_format_string = "%8.6f"; MarkovMatrix *M; char c; char tuple[5], tuple2[5]; /* , aa_alph[50]; */ char *subst_mat_fname = NULL, *subst_score_fname = NULL, *subst_mat_fname_paml = NULL, *order1_mod_fname = NULL; Matrix *subst_mat = NULL; List *matrix_list = lst_new_ptr(20), *traversal = NULL; while ((c = (char)getopt(argc, argv, "t:fedlLiM:N:A:B:aszSECh")) != -1) { switch(c) { case 't': if (optarg[0] == 'A') all_branches = 1; else t = get_arg_dbl_bounds(optarg, 0, INFTY); break; case 'f': do_eqfreqs = 1; break; case 'e': exch_mode = 1; break; case 'd': suppress_diag = 1; break; case 'l': list_mode = 1; break; case 'L': latex_mode = 1; break; case 'i': ti_tv = 1; break; case 'M': subst_mat_fname = optarg; induced_aa = 1; break; case 'N': subst_mat_fname_paml = optarg; induced_aa = 1; break; case 'A': subst_score_fname = optarg; break; case 'B': order1_mod_fname = optarg; break; case 'a': induced_aa = 1; do_zeroes = 1; break; case 's': do_stop_codons = 1; break; case 'z': do_zeroes = 1; break; case 'S': symmetric = 1; break; case 'E': scientific_mode = 1; rate_format_string = "%13.6e"; break; case 'C': context_ti_tv = 1; break; case 'h': print_usage(); exit(0); case '?': die("Unrecognized option. Try \"display_rate_matrix -h\" for help.\n"); } } set_seed(-1); if ((t >= 0 && exch_mode) || (latex_mode && list_mode) || ((ti_tv || subst_mat_fname != NULL || subst_score_fname != NULL || subst_mat_fname_paml != NULL || scientific_mode) && !list_mode) || (subst_mat_fname != NULL && subst_score_fname != NULL) || (subst_score_fname != NULL && subst_mat_fname_paml != NULL) || (subst_mat_fname != NULL && subst_mat_fname_paml != NULL) || optind != argc - 1) { die("ERROR: missing required arguments or illegal combination of arguments.\nTry \"display_rate_matrix -h\" for help.\n"); } F = phast_fopen(argv[optind], "r"); model = tm_new_from_file(F, 1); if (context_ti_tv) { /* this option requires completely different handling from the others */ if (model->order != 2) { die("ERROR: -C requires a model of order 3.\n"); } do_context_dependent_ti_tv(model); exit(0); } if (induced_aa) { TreeModel *aa_model = tm_induced_aa(model); char *codon_to_aa = get_codon_mapping(model->rate_matrix->states); /* before freeing model, grab the expected rate of synonymous subst, rho_s */ for (i = 0; i < model->rate_matrix->size; i++) for (j = 0; j < model->rate_matrix->size; j++) if (i != j && codon_to_aa[i] == codon_to_aa[j]) rho_s += mm_get(model->rate_matrix, i, j) * vec_get(model->backgd_freqs, i); sfree(codon_to_aa); tm_free(model); model = aa_model; } if (all_branches) { traversal = tr_inorder(model->tree); for (matrix_idx = 0; matrix_idx < lst_size(traversal); matrix_idx++) { TreeNode *n = lst_get_ptr(traversal, matrix_idx); if (n->parent == NULL) { lst_push_ptr(matrix_list, NULL); continue; } M = mm_new(model->rate_matrix->size, model->rate_matrix->states, DISCRETE); mm_exp(M, model->rate_matrix, n->dparent); lst_push_ptr(matrix_list, M); } } else if (t >= 0) { M = mm_new(model->rate_matrix->size, model->rate_matrix->states, DISCRETE); mm_exp(M, model->rate_matrix, t); lst_push_ptr(matrix_list, M); } else lst_push_ptr(matrix_list, model->rate_matrix); alph_size = (int)strlen(model->rate_matrix->states); nstates = model->rate_matrix->size; if (subst_mat_fname != NULL) { if ((F = fopen(subst_mat_fname, "r")) == NULL) { die("ERROR: Can't open %s.\n", subst_mat_fname); } subst_mat = read_subst_mat(F, AA_ALPHABET); } else if (subst_mat_fname_paml != NULL) { if ((F = fopen(subst_mat_fname_paml, "r")) == NULL) { die("ERROR: Can't open %s.\n", subst_mat_fname_paml); } subst_mat = read_paml_matrix(F, AA_ALPHABET); } else if (subst_score_fname != NULL) { if ((F = fopen(subst_score_fname, "r")) == NULL) { die("ERROR: Can't open %s.\n", subst_score_fname); } subst_mat = read_subst_scores(model, F); } else if (order1_mod_fname != NULL) { if ((F = fopen(order1_mod_fname, "r")) == NULL) { die("ERROR: Can't open %s.\n", order1_mod_fname); } subst_mat = unproject_rates(model, tm_new_from_file(F, 1)); } /* loop through matrices to print */ for (matrix_idx = 0; matrix_idx < lst_size(matrix_list); matrix_idx++) { M = lst_get_ptr(matrix_list, matrix_idx); if (all_branches) { if (M == NULL) continue; /* root */ printf("BRANCH %d (t = %.6f)\n", ++branch_no, ((TreeNode*)lst_get_ptr(traversal, matrix_idx))->dparent); } /* print no more than 16 columns at a time (except with -a) */ ncols = (induced_aa ? nstates : 16); for (startcol = 0; startcol < nstates; startcol += ncols) { endcol = min(nstates, startcol+ncols); /* table header */ if (! list_mode) { if (latex_mode) { printf("\\begin{tabular}{|c|"); for (i = startcol; i < endcol; i++) printf("r"); printf("|}\n\\hline\n"); } printf("%-5s ", ""); if (latex_mode) printf("& "); for (i = startcol; i < endcol; i++) { get_state_tuple(model, tuple, i); if (latex_mode) { printf("{\\bf %s}", tuple); if (i < endcol-1) printf("& "); } else printf("%8s ", tuple); } if (latex_mode) printf("\\\\\n\\hline\n"); else printf("\n"); } /* table or list contents */ for (i = 0; i < nstates; i++) { if (induced_aa && AA_ALPHABET[i] == '$' && !do_stop_codons) continue; get_state_tuple(model, tuple, i); /* get total eq freq of tuples containing CpG dinucs */ for (k = 0; k < model->order; k++) { if (tuple[k] == 'C' && tuple[k+1] == 'G') { cpg_eqfreq += vec_get(model->backgd_freqs, i); /* printf("***CPG***"); */ break; } } if (latex_mode) printf("{\\bf %s}& ", tuple); else if (!list_mode) printf("%-5s ", tuple); for (j = startcol; j < endcol; j++) { if (induced_aa && AA_ALPHABET[j] == '$' && !do_stop_codons) continue; if (latex_mode) printf("$"); if (list_mode) { if (symmetric && j <= i) continue; else if ((t < 0 && ! all_branches) && (i == j || (!do_zeroes && mm_get(M, i, j) == 0))) continue; get_state_tuple(model, tuple2, j); printf("%-5s %-5s ", tuple, tuple2); } if (i == j && suppress_diag && !list_mode) printf("%-7s", "-"); else { /* get rate or probability */ double val = exch_mode == 0 ? mm_get(M, i, j) : safediv(mm_get(M, i, j), vec_get(model->backgd_freqs,j)); /* print value in format %8.6f or %13.6e */ printf(rate_format_string, val); printf(" "); } if (latex_mode) { printf("$"); if (j < endcol-1) printf("& "); } else if (list_mode) { int ti, is_cpg; if (ti_tv) { ti = -1; is_cpg = 0; for (k = 0; k <= model->order; k++) { int dig_i = (i % int_pow(alph_size, k+1)) / int_pow(alph_size, k); int dig_j = (j % int_pow(alph_size, k+1)) / int_pow(alph_size, k); char next_char = '\0', prev_char = '\0'; if (dig_i != dig_j) { ti = is_transition(M->states[dig_i], M->states[dig_j]); if (k != model->order) prev_char = M->states[(i % int_pow(alph_size, k+2)) / int_pow(alph_size, k+1)]; if (k != 0) next_char = M->states[(i % int_pow(alph_size, k)) / int_pow(alph_size, k-1)]; if ((M->states[dig_i] == 'C' && next_char == 'G') || (M->states[dig_i] == 'G' && prev_char == 'C')) is_cpg = 1; } } if (ti == -1) die("ERROR ti=-1\n"); printf("%5s ", ti ? "ti" : "tv"); /* printf("%5s ", is_cpg ? "CPG" : "-"); */ if (ti) { total_ti += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); if (is_cpg) cpg_ti += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); else non_cpg_ti += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); } else { total_tv += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); if (is_cpg) cpg_tv += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); else non_cpg_tv += mm_get(M, i, j) * vec_get(model->backgd_freqs, i); } } if (subst_mat != NULL) { if (mat_get(subst_mat, i, j) == NEGINFTY) printf("%8s", "-"); else printf("%8.4f", mat_get(subst_mat, i, j)); } printf("\n"); } } if (latex_mode) printf("\\\\\n"); else if (!list_mode) printf("\n"); } /* equilibrium freqs (table case only) */ if (do_eqfreqs && ! list_mode) { if (latex_mode) printf("\\hline\n$\\boldsymbol{\\mathbf{\\pi}}$&"); else printf("%-5s ", "pi"); for (i = startcol; i < endcol; i++) { if (latex_mode) printf("$%8.4f$ ", vec_get(model->backgd_freqs, i)); else printf("%8.4f ", vec_get(model->backgd_freqs, i)); if (latex_mode && i < endcol-1) printf("& "); } if (latex_mode) printf("\\\\\n"); else printf("\n"); } if (latex_mode) printf("\\hline\n\\end{tabular}\n\n"); } /* equilibrium freqs (list case only) */ if (do_eqfreqs && list_mode) { for (i = 0; i < nstates; i++) { get_state_tuple(model, tuple, i); printf("%-5s %-5s ", "-", tuple); //!! printf(rate_format_string, vec_get(model->backgd_freqs, i)); printf("\n"); } } if (ti_tv && list_mode) { printf("\n#Total ti/tv = %.4f\n", total_ti/total_tv); printf("#CpG ti ratio = %.4f, CpG tv ratio = %.4f\n", cpg_ti/non_cpg_ti /* * (1 - cpg_eqfreq) */ / cpg_eqfreq, cpg_tv/non_cpg_tv /* * (1 - cpg_eqfreq) */ / cpg_eqfreq); } else if (induced_aa) printf("\n#Total rho_s/rho_v = %.4f\n", rho_s/(3-rho_s)); if (all_branches == 1) printf("\n\n"); } tm_free(model); lst_free(matrix_list); return 0; }