static void write_tree(tree_node *p, FILE *f) { fprintf(f, "%c%c %s\n", (p->left == NULL)? 'N':'Y', (p->right == NULL)? 'N':'Y', p->name); if (p->left != NULL) write_tree(p->left, f); if (p->right != NULL) write_tree(p->right, f); }
/* Write the Huffman tree recursively. */ void write_tree(file *f, huf_node *node){ if(node->left){ // internal node bitfile_put_bit(f, 0); write_tree(f, node->left); write_tree(f, node->right); }else{ // leaf node bitfile_put_bit(f, 1); bitfile_put_symbol(f, node->symbol, SYMBOL_LENGTH); } }
gboolean gitg_commit_commit(GitgCommit *commit, gchar const *comment, gboolean signoff, GError **error) { g_return_val_if_fail(GITG_IS_COMMIT(commit), FALSE); gchar *tree; if (!write_tree(commit, &tree, error)) return FALSE; gchar *ref; gboolean ret = commit_tree(commit, tree, comment, signoff, &ref, error); g_free(tree); if (!ret) return FALSE; gchar *subject = comment_parse_subject(comment); ret = update_ref(commit, ref, subject, error); g_free(subject); if (!ret) return FALSE; gitg_repository_reload(commit->priv->repository); return TRUE; }
int git_tree_cache_write(git_buf *out, git_tree_cache *tree) { recount_entries(tree); write_tree(out, tree); return git_buf_oom(out) ? -1 : 0; }
//vl_bool CHIKMTree::vl_hikm_write_tree(CString strSavePath) vl_bool CHIKMTree::vl_hikm_write_tree(const char* strSavePath) { char strFileName[MAX_PATH]; strcpy( strFileName, strSavePath ); strcat( strFileName, "\\cluster.txt" ); //CString strFileName = strSavePath + "\\cluster.txt"; FILE * file; file = fopen( strFileName,"wb"); if (!file) { return FALSE; } if(fprintf(file,"%u %u %u %d %d\n", m_VlHIKMTree->M, m_VlHIKMTree->K, m_VlHIKMTree->depth, m_VlHIKMTree->max_niters, m_VlHIKMTree->verb) < 0){ return FALSE; } fclose(file); if(!write_tree(m_VlHIKMTree->root, m_VlHIKMTree->depth, strSavePath, "0")){ return FALSE; } return TRUE; }
void BenchTree() { printf("\n************ ROOT/TTree I/O ************ \n"); const char *fname2 = "testio.root"; Double_t wbytes; wbytes = write_tree(fname2,100000000,0); read_tree(fname2,wbytes); }
void select_print(unsigned int* number_of_nodes) { if(0 < *number_of_nodes){ write_tree(); }else{ puts("failed - tree empty!"); return; } puts("done."); }
void tree_write(tree_node *p, FILE *f) { if (p == NULL) { fprintf(f, "XX\n"); return; } write_tree(p, f); }
static void write_tree(git_buf *out, git_tree_cache *tree) { size_t i; git_buf_printf(out, "%s%c%"PRIdZ" %"PRIuZ"\n", tree->name, 0, tree->entry_count, tree->children_count); if (tree->entry_count != -1) git_buf_put(out, (const char *) &tree->oid, GIT_OID_RAWSZ); for (i = 0; i < tree->children_count; i++) write_tree(out, tree->children[i]); }
void render(struct page *p) { int depth; FILE *out; struct lacy_env env; struct page_stack p_stack; struct ut_str outfile; str_init(&curtok); if (NULL == p) return; str_init(&outfile); str_append_str(&outfile, conf.output_dir.s); str_append(&outfile, '/'); str_append_str(&outfile, p->file_path); /* depth - 1 since we added output dir to path */ depth = build_depth(outfile.s) - 1; if (NULL == (out = fopen(outfile.s, "w"))) fatal("Unable to open: %: ", outfile.s); p_stack.size = 0; p_stack.pos = 0; /* Build Environment */ env.depth = depth; env.p_stack = &p_stack; env.sym_tbl = NULL; env_build(p, &env); /* set stack back to top */ p_stack.pos = 0; /* do it already */ build_tree(&env); write_tree(out, &env); env_free(&env); fclose(out); str_free(&curtok); if (verbosity > 0) { printf("Rendered %s\n", outfile.s); } str_free(&outfile); }
int git_tree_create_fromindex(git_oid *oid, git_index *index) { int error; if (index->repository == NULL) return git__throw(GIT_EBAREINDEX, "Failed to create tree. The index file is not backed up by an existing repository"); if (index->tree != NULL && index->tree->entries >= 0) { git_oid_cpy(oid, &index->tree->oid); return GIT_SUCCESS; } /* The tree cache didn't help us */ error = write_tree(oid, index, "", 0); return (error < GIT_SUCCESS) ? git__rethrow(error, "Failed to create tree") : GIT_SUCCESS; }
TREE_T* read_tree_from_file (char* filename) { TREE_T* tree; FILE* tree_file = NULL; if (open_file(filename, "r", 1, "tree", "tree", &tree_file) == 0) { die("Couldn't open the file %s.\n", filename); } read_tree(tree_file, &tree); (void) fclose(tree_file); if (verbosity >= HIGH_VERBOSE) { fprintf(stderr, "Read tree: "); write_tree(tree, stderr); } return tree; }
//static vl_bool write_tree(VlHIKMNode* node, vl_uint32 height, CString strSavePath, CString strInd) static vl_bool write_tree(VlHIKMNode* node, vl_uint32 height, const char* strSavePath, const char* strInd) { //CString strFileClusterName = strSavePath + "\\" + strInd + ".txt"; char strFileClusterName[MAX_PATH]; strcpy( strFileClusterName, strSavePath ); strcat( strFileClusterName, "\\" ); strcat( strFileClusterName, strInd ); strcat( strFileClusterName, ".txt" ); if(!node->filter->vl_ikm_write_tree(strFileClusterName)){ return FALSE; } if (height>1) { vl_uint32 k; for (k=0; k<node->filter->vl_ikm_get_k(); k++) { //CString strIndChild; //strIndChild.Format( _T("%3d"), k ); //strIndChild.Replace( _T(" "), _T("0") ); //strIndChild = strInd + strIndChild; char strIndChild[MAX_PATH]; sprintf( strIndChild, "%s%4d", strInd, k ); for ( int c = 0; c < strlen(strIndChild); c++ ) { if ( strIndChild[c] == ' ') { strIndChild[c] = '0'; } } if(!write_tree(node->children[k], height-1, strSavePath, strIndChild)){ return FALSE; } } } return TRUE; }
/************************************************************************* * Entry point for pmp_bf *************************************************************************/ int main(int argc, char *argv[]) { char* bg_filename = NULL; char* motif_name = "motif"; // Use this motif name in the output. STRING_LIST_T* selected_motifs = NULL; double fg_rate = 1.0; double bg_rate = 1.0; double purine_pyrimidine = 1.0; // r double transition_transversion = 0.5; // R double pseudocount = 0.1; GAP_SUPPORT_T gap_support = SKIP_GAPS; MODEL_TYPE_T model_type = F81_MODEL; BOOLEAN_T use_halpern_bruno = FALSE; char* ustar_label = NULL; // TLB; create uniform star tree int i; program_name = "pmp_bf"; /********************************************** * COMMAND LINE PROCESSING **********************************************/ // Define command line options. (FIXME: Repeated code) // FIXME: Note that if you add or remove options you // must change n_options. int n_options = 12; cmdoption const pmp_options[] = { {"hb", NO_VALUE}, {"ustar", REQUIRED_VALUE}, {"model", REQUIRED_VALUE}, {"pur-pyr", REQUIRED_VALUE}, {"transition-transversion", REQUIRED_VALUE}, {"bg", REQUIRED_VALUE}, {"fg", REQUIRED_VALUE}, {"motif", REQUIRED_VALUE}, {"motif-name", REQUIRED_VALUE}, {"bgfile", REQUIRED_VALUE}, {"pseudocount", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE} }; int option_index = 0; // Define the usage message. char usage[1000] = ""; strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n"); strcat(usage, "\n"); strcat(usage, " Options:\n"); // Evolutionary model parameters. strcat(usage, " --hb\n"); strcat(usage, " --model single|average|jc|k2|f81|f84|hky|tn"); strcat(usage, " (default=f81)\n"); strcat(usage, " --pur-pyr <float> (default=1.0)\n"); strcat(usage, " --transition-transversion <float> (default=0.5)\n"); strcat(usage, " --bg <float> (default=1.0)\n"); strcat(usage, " --fg <float> (default=1.0)\n"); // Motif parameters. strcat(usage, " --motif <id> (default=all)\n"); strcat(usage, " --motif-name <string> (default from motif file)\n"); // Miscellaneous parameters strcat(usage, " --bgfile <background> (default from motif file)\n"); strcat(usage, " --pseudocount <float> (default=0.1)\n"); strcat(usage, " --ustar <label>\n"); // TLB; create uniform star tree strcat(usage, " --verbosity [1|2|3|4] (default 2)\n"); strcat(usage, "\n Prints the FP and FN rate at each of 10000 score values.\n"); strcat(usage, "\n Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n"); // Parse the command line. if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) { die("Error processing command line options: option name too long.\n"); } while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); die("Error processing command line options (%s)\n", message); } if (strcmp(option_name, "model") == 0) { if (strcmp(option_value, "jc") == 0) { model_type = JC_MODEL; } else if (strcmp(option_value, "k2") == 0) { model_type = K2_MODEL; } else if (strcmp(option_value, "f81") == 0) { model_type = F81_MODEL; } else if (strcmp(option_value, "f84") == 0) { model_type = F84_MODEL; } else if (strcmp(option_value, "hky") == 0) { model_type = HKY_MODEL; } else if (strcmp(option_value, "tn") == 0) { model_type = TAMURA_NEI_MODEL; } else if (strcmp(option_value, "single") == 0) { model_type = SINGLE_MODEL; } else if (strcmp(option_value, "average") == 0) { model_type = AVERAGE_MODEL; } else { die("Unknown model: %s\n", option_value); } } else if (strcmp(option_name, "hb") == 0){ use_halpern_bruno = TRUE; } else if (strcmp(option_name, "ustar") == 0){ // TLB; create uniform star tree ustar_label = option_value; } else if (strcmp(option_name, "pur-pyr") == 0){ purine_pyrimidine = atof(option_value); } else if (strcmp(option_name, "transition-transversion") == 0){ transition_transversion = atof(option_value); } else if (strcmp(option_name, "bg") == 0){ bg_rate = atof(option_value); } else if (strcmp(option_name, "fg") == 0){ fg_rate = atof(option_value); } else if (strcmp(option_name, "motif") == 0){ if (selected_motifs == NULL) { selected_motifs = new_string_list(); } add_string(option_value, selected_motifs); } else if (strcmp(option_name, "motif-name") == 0){ motif_name = option_value; } else if (strcmp(option_name, "bgfile") == 0){ bg_filename = option_value; } else if (strcmp(option_name, "pseudocount") == 0){ pseudocount = atof(option_value); } else if (strcmp(option_name, "verbosity") == 0){ verbosity = atoi(option_value); } } // Must have tree and motif file names if (argc != option_index + 2) { fprintf(stderr, "%s", usage); exit(EXIT_FAILURE); } /********************************************** * Read the phylogenetic tree. **********************************************/ char* tree_filename = NULL; TREE_T* tree = NULL; tree_filename = argv[option_index]; option_index++; tree = read_tree_from_file(tree_filename); // get the species names STRING_LIST_T* alignment_species = make_leaf_list(tree); char *root_label = get_label(tree); // in case target in center if (strlen(root_label)>0) add_string(root_label, alignment_species); //write_string_list(" ", alignment_species, stderr); // TLB; Convert the tree to a uniform star tree with // the target sequence at its center. if (ustar_label != NULL) { tree = convert_to_uniform_star_tree(tree, ustar_label); if (tree == NULL) die("Tree or alignment missing target %s\n", ustar_label); if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Target %s placed at center of uniform (d=%.3f) star tree:\n", ustar_label, get_total_length(tree) / get_num_children(tree) ); write_tree(tree, stderr); } } /********************************************** * Read the motifs. **********************************************/ char* meme_filename = argv[option_index]; option_index++; int num_motifs = 0; MREAD_T *mread; ALPH_T alph; ARRAYLST_T *motifs; ARRAY_T *bg_freqs; mread = mread_create(meme_filename, OPEN_MFILE); mread_set_bg_source(mread, bg_filename); mread_set_pseudocount(mread, pseudocount); // read motifs motifs = mread_load(mread, NULL); alph = mread_get_alphabet(mread); bg_freqs = mread_get_background(mread); // check if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename); // TLB; need to resize bg_freqs array to ALPH_SIZE items // or copy array breaks in HB mode. This throws away // the freqs for the ambiguous characters; int asize = alph_size(alph, ALPH_SIZE); resize_array(bg_freqs, asize); /************************************************************** * Compute probability distributions for each of the selected motifs. **************************************************************/ int motif_index; for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) { MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs); char* motif_id = get_motif_id(motif); char* bare_motif_id = motif_id; // We may have specified on the command line that // only certain motifs were to be used. if (selected_motifs != NULL) { if (*bare_motif_id == '+' || *bare_motif_id == '-') { // The selected motif id won't included a strand indicator. bare_motif_id++; } if (have_string(bare_motif_id, selected_motifs) == FALSE) { continue; } } if (verbosity >= NORMAL_VERBOSE) { fprintf( stderr, "Using motif %s of width %d.\n", motif_id, get_motif_length(motif) ); } // Build an array of evolutionary models for each position in the motif. EVOMODEL_T** models = make_motif_models( motif, bg_freqs, model_type, fg_rate, bg_rate, purine_pyrimidine, transition_transversion, use_halpern_bruno ); // Get the frequencies under the background model (row 0) // and position-dependent scores (rows 1..w) // for each possible alignment column. MATRIX_T* pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, get_motif_length(motif) + 1, models, tree, gap_support ); ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs); remove_matrix_row(0, pssm_matrix); // throw away first row //print_col_frequencies(alph, alignment_col_freqs); // // Get the position-dependent null model alignment column frequencies // int w = get_motif_length(motif); int ncols = get_num_cols(pssm_matrix); MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols); for (i=0; i<w; i++) { // get the evo model corresponding to this column of the motif // and store it as the first evolutionary model. myfree(models[0]); // Use motif PSFM for equilibrium freqs. for model. ARRAY_T* site_specific_freqs = allocate_array(asize); int j = 0; for(j = 0; j < asize; j++) { double value = get_matrix_cell(i, j, get_motif_freqs(motif)); set_array_item(j, value, site_specific_freqs); } if (use_halpern_bruno == FALSE) { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, site_specific_freqs, NULL ); } else { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, bg_freqs, site_specific_freqs ); } // get the alignment column frequencies using this model MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, 2, // only interested in freqs under bkg models, tree, gap_support ); // assemble the position-dependent background alignment column freqs. set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg); // chuck the pssm (not his real name) free_matrix(tmp_pssm_matrix); } // // Compute and print the score distribution under the background model // and under the (position-dependent) motif model. // int range = 10000; // 10^4 gives same result as 10^5, but 10^3 differs // under background model PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); // under position-dependent background (motif) model PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); get_pv_lookup_pos_dep( pssm_pos_dep, pos_dep_bkg, NULL // no priors used ); // print FP and FN distributions int num_items = get_pssm_pv_length(pssm_pos_dep); for (i=0; i<num_items; i++) { double pvf = get_pssm_pv(i, pssm); double pvt = get_pssm_pv(i, pssm_pos_dep); double fpr = pvf; double fnr = 1 - pvt; if (fpr >= 0.99999 || fnr == 0) continue; printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr); } // free stuff free_pssm(pssm); free_pssm(pssm_pos_dep); if (models != NULL) { int model_index; int num_models = get_motif_length(motif) + 1; for (model_index = 0; model_index < num_models; model_index++) { free_model(models[model_index]); } myfree(models); } } // motif arraylst_destroy(destroy_motif, motifs); /********************************************** * Clean up. **********************************************/ // TLB may have encountered a memory corruption bug here // CEG has not been able to reproduce it. valgrind says all is well. free_array(bg_freqs); free_tree(TRUE, tree); free_string_list(selected_motifs); return(0); } // main
void huf_write_tree(file *f, huf_tree tree){ write_tree(f, tree.root); }
static int write_tree(git_oid *oid, git_index *index, const char *dirname, unsigned int start) { git_treebuilder *bld = NULL; unsigned int i, entries = git_index_entrycount(index); int error; size_t dirname_len = strlen(dirname); const git_tree_cache *cache; cache = git_tree_cache_get(index->tree, dirname); if (cache != NULL && cache->entries >= 0){ git_oid_cpy(oid, &cache->oid); return find_next_dir(dirname, index, start); } error = git_treebuilder_create(&bld, NULL); if (bld == NULL) { return GIT_ENOMEM; } /* * This loop is unfortunate, but necessary. The index doesn't have * any directores, so we need to handle that manually, and we * need to keep track of the current position. */ for (i = start; i < entries; ++i) { git_index_entry *entry = git_index_get(index, i); char *filename, *next_slash; /* * If we've left our (sub)tree, exit the loop and return. The * first check is an early out (and security for the * third). The second check is a simple prefix comparison. The * third check catches situations where there is a directory * win32/sys and a file win32mmap.c. Without it, the following * code believes there is a file win32/mmap.c */ if (strlen(entry->path) < dirname_len || memcmp(entry->path, dirname, dirname_len) || (dirname_len > 0 && entry->path[dirname_len] != '/')) { break; } filename = entry->path + dirname_len; if (*filename == '/') filename++; next_slash = strchr(filename, '/'); if (next_slash) { git_oid sub_oid; int written; char *subdir, *last_comp; subdir = git__strndup(entry->path, next_slash - entry->path); if (subdir == NULL) { error = GIT_ENOMEM; goto cleanup; } /* Write out the subtree */ written = write_tree(&sub_oid, index, subdir, i); if (written < 0) { error = git__rethrow(written, "Failed to write subtree %s", subdir); } else { i = written - 1; /* -1 because of the loop increment */ } /* * We need to figure out what we want toinsert * into this tree. If we're traversing * deps/zlib/, then we only want to write * 'zlib' into the tree. */ last_comp = strrchr(subdir, '/'); if (last_comp) { last_comp++; /* Get rid of the '/' */ } else { last_comp = subdir; } error = append_entry(bld, last_comp, &sub_oid, S_IFDIR); free(subdir); if (error < GIT_SUCCESS) { error = git__rethrow(error, "Failed to insert dir"); goto cleanup; } } else { error = append_entry(bld, filename, &entry->oid, entry->mode); if (error < GIT_SUCCESS) { error = git__rethrow(error, "Failed to insert file"); } } } error = git_treebuilder_write(oid, index->repository, bld); if (error < GIT_SUCCESS) error = git__rethrow(error, "Failed to write tree to db"); cleanup: git_treebuilder_free(bld); if (error < GIT_SUCCESS) return error; else return i; }
int main(int argc, char **argv) { si_t si = make_si(1024); FILE *grammarfp = stdin, *yieldfp; FILE *tracefp = NULL; /* trace output */ FILE *summaryfp = stderr; /* end of parse stats output */ FILE *parsefp = stdout; /* parse trees */ FILE *probfp = NULL; /* max_neglog_prob */ chart_cell root_cell; grammar g; chart c; vindex terms; int maxsentlen = 0; int sentenceno = 0, parsed_sentences = 0, failed_sentences = 0; double sum_neglog_prob = 0; int sentfrom = 0; int sentto = 0; srand(RAND_SEED); /* seed random number generator */ if (argc<2 || argc>6) { fprintf(stderr, "%s yieldfile [maxsentlen [grammarfile [sentfrom sentto]]]\n", argv[0]); exit(EXIT_FAILURE); } if ((yieldfp = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "%s: Couldn't open yieldfile %s\n", argv[0], argv[1]); exit(EXIT_FAILURE); } if (argc >= 3) if (!sscanf(argv[2], "%d", &maxsentlen)) { fprintf(stderr, "%s: Couldn't parse maxsentlen %s\n", argv[0], argv[2]); exit(EXIT_FAILURE); } if (argc >= 4) if ((grammarfp = fopen(argv[3], "r")) == NULL) { fprintf(stderr, "%s: Couldn't open grammarfile %s\n", argv[0], argv[3]); exit(EXIT_FAILURE); } if (argc >= 6) { if (!sscanf(argv[4], "%d", &sentfrom)) { fprintf(stderr, "%s: Couldn't parse sentfrom %s\n", argv[0], argv[4]); exit(EXIT_FAILURE); } if (!sscanf(argv[5], "%d", &sentto)) { fprintf(stderr, "%s: Couldn't parse sentto %s\n", argv[0], argv[5]); exit(EXIT_FAILURE); } } g = read_grammar(grammarfp, si); /* write_grammar(tracefp, g, si); */ while ((terms = read_terms(yieldfp, si))) { sentenceno++; if (sentfrom && sentenceno < sentfrom) { vindex_free(terms); continue; } if (sentto && sentenceno > sentto) { vindex_free(terms); break; } /* skip if sentence is too long */ if (!maxsentlen || (int) terms->n <= maxsentlen) { size_t i; if (tracefp) { fprintf(tracefp, "\nSentence %d:\n", sentenceno); for (i=0; i<terms->n; i++) fprintf(tracefp, " %s", si_index_string(si, terms->e[i])); fprintf(tracefp, "\n"); } c = cky(*terms, g, si); /* fetch best root node */ root_cell = sihashcc_ref(CHART_ENTRY(c, 0, terms->n), g.root_label); if (root_cell) { tree parse_tree = bintree_tree(&root_cell->tree, si); double prob = (double) root_cell->prob; parsed_sentences++; assert(prob > 0.0); sum_neglog_prob -= log(prob); if (probfp) fprintf(probfp, "max_neglog_prob(%d, %g).\n", sentenceno, -log(prob)); if (tracefp) fprintf(tracefp, " Prob = %g\n", prob); if (parsefp) { write_tree(parsefp, parse_tree, si); fprintf(parsefp, "\n"); /* write_prolog_tree(parsefp, parse_tree, si); */ } free_tree(parse_tree); } else { failed_sentences++; if (tracefp) fprintf(tracefp, "Failed to parse\n"); if (parsefp) fprintf(parsefp, "parse_failure.\n"); } chart_free(c, terms->n); /* free the chart */ } else { /* sentence too long */ if (parsefp) fprintf(parsefp, "too_long.\n"); } vindex_free(terms); /* free the terms */ assert(trees_allocated == 0); assert(bintrees_allocated == 0); } free_grammar(g); si_free(si); if (summaryfp) { fprintf(summaryfp, "\n%d/%d = %g%% test sentences met the length criteron," " of which %d/%d = %g%% were parsed\n", parsed_sentences+failed_sentences, sentenceno, (double) (100.0 * (parsed_sentences+failed_sentences)) / sentenceno, parsed_sentences, parsed_sentences+failed_sentences, (double) (100.0 * parsed_sentences) / (parsed_sentences + failed_sentences)); fprintf(summaryfp, "Sum(-log prob) = %g\n", sum_neglog_prob); } /* check that everything has been deallocated */ /* printf("mmm_blocks_allocated = %ld\n", (long) mmm_blocks_allocated); */ assert(mmm_blocks_allocated == 0); exit(EXIT_SUCCESS); }
int main(int argc, char **argv) { init_rand(); setup_gsl_dgen(); dgen_parse_cmdline(argc, argv); int i, j, h, p, k, c; int cons_cap, num_cons = -1, root_cap, num_root = -1; char **cons_seqs, **root_seqs; int internal_node_index = 0, leaf_node_index = M; int max_internal_node_index = 2000000; int max_leaf_node_index = 2000000; int sum_seen_or_unseen = 0; int ploidy, codon_sequence_length = -1, n_genes, total_n_HLA; Decimal mu; if(json_parameters_path == NULL) die("No .json file passed."); load_lengths_for_simulation_from_json(json_parameters_path, &kappa, &mu, &codon_sequence_length, &total_n_HLA, &ploidy, &n_genes); if(ploidy < 1) die("Ploidy is less than 1."); if(n_genes < 1) die("Number of genes is less than 1."); if(cons_path != NULL) { printf("Loading sequences to obtain consensus...\n"); num_cons = load_seqs(cons_path, &cons_seqs, &cons_cap); assert(num_cons > 0); printf("Loaded %i sequences to determine consensus.\n", num_cons); codon_sequence_length = strlen(cons_seqs[0]); printf("Codon_sequence_length: %i\n",codon_sequence_length/3); if(codon_sequence_length % 3 != 0) die("Sequences contain partial codons [%i mod 3 != 0].", codon_sequence_length); for(c = 0; c < num_cons; c++) { if((int) strlen(cons_seqs[c]) != codon_sequence_length) { die("Sequences from which to derive the consensus sequence aren't all " "the same length."); } } codon_sequence_length = codon_sequence_length/3; } if(root_path != NULL) { printf("Loading sequences to obtain root...\n"); num_root = load_seqs(root_path, &root_seqs, &root_cap); printf("Loaded %i sequences to determine root.\n", num_root); if(cons_path == NULL) die("Did not pass a file to find the consensus sequence."); if((int) (strlen(root_seqs[0])/3) != codon_sequence_length) die("Sequences used to determine the root are different lengths to those used for the consensus."); for(c = 0; c < num_root; c++) { if((int) strlen(root_seqs[c]) != 3*codon_sequence_length) { die("Sequences from which to derive the root sequence aren't all " "the same length."); } } } Decimal *internal_node_times = my_malloc(max_internal_node_index * sizeof(Decimal) , __FILE__, __LINE__); Decimal *leaf_node_times = my_malloc(max_leaf_node_index * sizeof(Decimal), __FILE__, __LINE__); int *seen_or_unseen = my_malloc(max_internal_node_index * sizeof(int), __FILE__, __LINE__); birth_death_simulation_backwards(max_internal_node_index, max_leaf_node_index, internal_node_times, leaf_node_times, &internal_node_index, &leaf_node_index, seen_or_unseen, N, M, lambda, mu_tree, past_sampling); for(i = 0; i < internal_node_index; i++) sum_seen_or_unseen += seen_or_unseen[i]; int total_nodes = (2 * leaf_node_index) - 1 + internal_node_index - sum_seen_or_unseen; Tree *tree = my_malloc((total_nodes+1) * sizeof(Tree), __FILE__, __LINE__); // Now malloc the memory that this points to. int *HLAs_in_tree = my_malloc((total_nodes+1) * ploidy * n_genes * sizeof(int), __FILE__, __LINE__); for(i = 0; i < total_nodes; i++) tree[i].HLAs = &HLAs_in_tree[i * ploidy * n_genes]; construct_birth_death_tree(leaf_node_index, internal_node_index, leaf_node_times, internal_node_times, M, seen_or_unseen, tree); // Reverse the direction that time is measured in the tree. // DEV: Don't need to do this, waste of computation - sort. // DEV: The parent times are wrong when there are unseen nodes. for(i = 0; i < total_nodes; i++) tree[i].node_time = tree[total_nodes-1].node_time - tree[i].node_time; int root_node = tree[total_nodes-1].node; if(write_newick_tree_to_file == true) { write_newick_tree(newick_tree_data_file, tree, root_node, 1); fclose(newick_tree_data_file); } Decimal S_portion[NUM_CODONS]; Decimal NS_portion[NUM_CODONS]; for(c = 0; c < NUM_CODONS; c++) { S_portion[c] = kappa * beta_S[c] + beta_V[c]; NS_portion[c] = kappa * alpha_S[c] + alpha_V[c]; } int n_HLA[n_genes]; printf("Total number of HLA types: %i.\n", total_n_HLA); Decimal HLA_prevalences[total_n_HLA]; int wildtype_sequence[codon_sequence_length]; Decimal *R = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__); Decimal *omega = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__); Decimal *reversion_selection = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__); memory_allocation(num_cons, num_root, codon_sequence_length, max_internal_node_index, max_leaf_node_index, total_nodes, ploidy, n_genes, total_n_HLA, leaf_node_index); int (*codon_sequence_matrix)[codon_sequence_length] = my_malloc(total_nodes * sizeof(int[codon_sequence_length]), __FILE__, __LINE__); Decimal (*HLA_selection_profiles)[codon_sequence_length] = my_malloc(total_n_HLA * sizeof(Decimal[codon_sequence_length]), __FILE__, __LINE__); load_parameters_for_simulation_from_json(json_parameters_path, codon_sequence_length, omega, R, reversion_selection, total_n_HLA, n_genes, n_HLA, HLA_prevalences, HLA_selection_profiles); Decimal sum_check; for(i = 0, k = 0; i < n_genes; i++) { sum_check = 0; for(h = 0; h < n_HLA[i]; h++, k++) { sum_check += HLA_prevalences[k]; } if(sum_check > 1.00001 || sum_check < 0.9999) die("HLA prevalences for gene %i do not sum to 1\n", i+1); } if(cons_path != NULL) { printf("Mapping gaps to consensus...\n"); // Set the consensus sequence - the consensus of the optional sequence file // that is passed. char wildtype_sequence_dummy[3*codon_sequence_length+1]; generate_consensus(cons_seqs, num_cons, 3*codon_sequence_length, wildtype_sequence_dummy); printf("Wildtype sequence:\n%s\n", wildtype_sequence_dummy); // By default, set the root as the wildtype sequence. for(i = 0; i < codon_sequence_length; i++) wildtype_sequence[i] = (int) amino_to_code(wildtype_sequence_dummy+i*3); if(root_path == NULL) { for(i = 0; i < codon_sequence_length; i++) codon_sequence_matrix[root_node][i] = wildtype_sequence[i]; } else { printf("Mapping gaps to root...\n"); char root_sequence_dummy[3*codon_sequence_length+1]; generate_consensus(root_seqs, num_root, 3*codon_sequence_length, root_sequence_dummy); printf("Root sequence:\n%s\n", root_sequence_dummy); for(i = 0; i < codon_sequence_length; i++) codon_sequence_matrix[root_node][i] = (int) amino_to_code(root_sequence_dummy+i*3); printf("Number of root sequences: %i.\n", num_root); for(c = 0; c < num_root; c++) free(root_seqs[c]); free(root_seqs); } printf("Number of consensus sequences: %i.\n", num_cons); for(c = 0; c < num_cons; c++) free(cons_seqs[c]); free(cons_seqs); } else { for(i = 0; i < codon_sequence_length; i++) { // Sample the root sequence according to the HIV codon usage information. codon_sequence_matrix[root_node][i] = discrete_sampling_dist(NUM_CODONS, prior_C1); // As default, set the root node to the consensus sequence. wildtype_sequence[i] = codon_sequence_matrix[root_node][i]; } } // No matter what is read in, there is no recombination simulated - so make sure it's set to 0. for(i = 0; i < codon_sequence_length; i++) R[i] = 0; write_summary_json(json_summary_file, mu, codon_sequence_length, ploidy, n_genes, n_HLA, total_n_HLA, HLA_prevalences, omega, R, reversion_selection, HLA_selection_profiles); free(R); fprintf(simulated_root_file, ">root_sequence\n"); for(i = 0; i < codon_sequence_length; i++) fprintf(simulated_root_file, "%s", code_to_char(codon_sequence_matrix[root_node][i])); fprintf(simulated_root_file, "\n"); int root_HLA[ploidy * n_genes]; int cumulative_n_HLA = 0; for(i = 0, k = 0; i < n_genes; i++) { for(p = 0; p < ploidy; p++, k++) { root_HLA[k] = cumulative_n_HLA + discrete_sampling_dist(n_HLA[i], &HLA_prevalences[cumulative_n_HLA]); tree[root_node].HLAs[k] = root_HLA[k]; } cumulative_n_HLA = cumulative_n_HLA + n_HLA[i]; } printf("Passing HLA information...\n"); pass_HLA(ploidy, n_genes, root_node, tree, leaf_node_index, total_n_HLA, n_HLA, HLA_prevalences); printf("Passed HLA information\n"); // printf("Printing the tree\n"); // for(i = 0; i < total_nodes; i++) { // printf("%i %i %i "DECPRINT" %i", tree[i].node, tree[i].daughter_nodes[0], // tree[i].daughter_nodes[1], tree[i].node_time, // tree[i].seen_or_unseen); // for(j = 0; j < (ploidy * n_genes); j++) { // printf(" %i", tree[i].HLAs[j]); // } // printf("\n"); // } if(write_tree_to_file == true) { write_tree(tree_data_file, tree, root_node, ploidy, n_genes); fclose(tree_data_file); } printf("Passing sequence information...\n"); pass_codon_sequence_change(codon_sequence_length, ploidy, n_genes, total_n_HLA, root_node, mu, codon_sequence_matrix, tree, leaf_node_index, S_portion, NS_portion, HLA_selection_profiles, wildtype_sequence, omega, reversion_selection); printf("Passed sequence information\n" "Now generating .fasta files of reference and query sequences, and\n" "a .csv file of the HLA information associated to the query sequences.\n"); if(num_queries < 0) { // Set the number of query sequences. num_queries = (int) (query_fraction * leaf_node_index); printf("Number of queries: %i.\n", num_queries); } else { printf("Number of queries: %i.\n", num_queries); } if(num_queries > leaf_node_index) die("Number of query sequences larger than the number of leaves"); int *all_sequences = my_malloc(leaf_node_index * sizeof(int), __FILE__, __LINE__); int num_refs = leaf_node_index - num_queries; for(i = 0; i < leaf_node_index; i++) all_sequences[i] = i; save_simulated_ref_and_query_fasta(num_queries, num_refs, leaf_node_index, all_sequences, codon_sequence_length, codon_sequence_matrix, tree, ploidy, n_genes); // Now save the hla types to a .csv file. fprintf(hla_query_file, "\"\","); for(h = 0; h < total_n_HLA-1; h++) fprintf(hla_query_file, "\"%i\",", h+1); fprintf(hla_query_file, "\"%i\"\n", total_n_HLA); // Write the HLA types of the leaves to a file. int (*hla_types)[total_n_HLA] = my_malloc(leaf_node_index * sizeof(int[total_n_HLA]), __FILE__, __LINE__); for(i = 0; i < leaf_node_index; i++) { for(h = 0; h < total_n_HLA; h++) hla_types[i][h] = 0; for(j = 0; j < (n_genes * ploidy); j++) hla_types[i][tree[i].HLAs[j]] = 1; } // Write the query HLA types to a .csv file. for(i = num_refs; i < leaf_node_index; i++) { fprintf(hla_query_file,"\"simulated_seq_%i_HLA", all_sequences[i]+1); for(h = 0; h < (ploidy * n_genes); h++) fprintf(hla_query_file, "_%i", tree[all_sequences[i]].HLAs[h]); fprintf(hla_query_file, "\""); for(h = 0; h < total_n_HLA; h++) { fprintf(hla_query_file, ",%i", hla_types[all_sequences[i]][h]); } fprintf(hla_query_file, "\n"); } free(hla_types); free(internal_node_times); free(leaf_node_times); free(seen_or_unseen); free(codon_sequence_matrix); free(HLA_selection_profiles); free(all_sequences); free(omega); free(reversion_selection); free(tree[0].HLAs); free(tree); fclose(summary_file); fclose(json_summary_file); fclose(simulated_refs_file); fclose(simulated_root_file); fclose(simulated_queries_file); fclose(hla_query_file); clearup_gsl_dgen(); return EXIT_SUCCESS; }