/************************************************************************* * Remove the nth child of a given tree. * * Does not free dynamic memory. *************************************************************************/ void remove_nth_child (const VERBOSE_T verbosity, const BOOLEAN free_child, // free child node? const BOOLEAN_T free_children, /* Boolean: Free children as well? */ const int n, TREE_T * a_tree) { int i_child; check_null_tree(a_tree); /* Make sure the child exists. */ if (n >= get_num_children(a_tree)) { die("Attempted to remove child %d from a tree with %d children.\n", n, get_num_children(a_tree)); } if (verbosity >= HIGHER_VERBOSE) { fprintf(stderr, "Removing child %s from tree %s.\n", get_key(a_tree->children[n]), get_key(a_tree)); } /* Free the child and its children. */ if (free_child) free_tree(free_children, a_tree->children[n]); /* Move the remaining children left one slot. */ for (i_child = n; i_child < get_num_children(a_tree) - 1; i_child++) { a_tree->children[i_child] = a_tree->children[i_child+1]; } /* Decrement the number of children. */ (a_tree->num_children)--; } // remove_nth_child
/************************************************************************* * Add a given child to a tree. *************************************************************************/ void add_child (const VERBOSE_T verbosity, TREE_T * const a_child, TREE_T * a_tree) { check_null_tree(a_tree); /* Make sure we don't put too many children in the tree. */ if (get_num_children(a_tree) >= MAX_DEGREE) { die("Attempted to add %s to tree (%s) with maximum degree (%d).\n", get_key(a_child), get_key(a_tree), get_num_children(a_tree)); } if (verbosity >= HIGHER_VERBOSE) { fprintf(stderr, "Adding child %s to tree %s.\n", get_key(a_child), get_key(a_tree)); } /* Add the child to the tree. */ a_tree->children[get_num_children(a_tree)] = a_child; (a_tree->num_children)++; /* Update the number of descendants, if necessary. */ if (a_tree->has_descendants) { compute_descendants(a_child, FALSE); a_tree->num_descendants += a_child->num_descendants; } }
/************************************************************************* * Retrieve the nth child of a given tree. *************************************************************************/ TREE_T * get_nth_child (const int n, TREE_T * const a_tree) { check_null_tree(a_tree); /* Make sure the child exists. */ if (n >= get_num_children(a_tree)) { die("Attempted to retrieve child %d from a tree with %d children.\n", n, get_num_children(a_tree)); } /* Return the requested child. */ return(a_tree->children[n]); }
/************************************************************************* This function populates a trans_matix_array with pointers to matrices and the corresponding time values indexed by the edge number of the phylogenetic tree. The edges are numbered in depth-first order. The three parameters are an evolutinary model, a phylogentic tree, and a pointer to substmatrix_array structure. The function returns an integer containing the number of matrices added to the substmatrix_array structure. *************************************************************************/ static int populate_substmatrix_array( EVOMODEL_T* model, // IN TREE_T* tree, // IN int current_position, // IN SUBSTMATRIX_ARRAY_T* array // OUT ) { // Recursively descend the tree, depth first int num_children = get_num_children(tree); if (is_leaf(tree) != TRUE) { int c = 0; for (c = 0; c < num_children; c++) { TREE_T* child = get_nth_child(c, tree); double t = get_length(child); set_substmatrix_time(array, current_position, t); MATRIX_T* prob_matrix = get_model_prob_matrix(t, model); set_substmatrix_matrix(array, current_position, prob_matrix); free_matrix(prob_matrix); current_position = populate_substmatrix_array( model, child, current_position + 1, array ); } } return current_position; }
/************************************************************************* * Compute the maximum depth of a tree. *************************************************************************/ int compute_depth (TREE_T * const a_tree) { /* Base case: leaf. */ if (is_leaf(a_tree)) { return(1); } /* Recursive case: internal node. */ else { int max_depth = 0; int num_children = get_num_children(a_tree); int i_child; for (i_child = 0; i_child < num_children; i_child++) { int this_depth = compute_depth(get_nth_child(i_child, a_tree)); if (this_depth > max_depth) { max_depth = this_depth; } } return(max_depth + 1); } /* Unreachable. */ abort(); return(0); }
int rem_dir( uint32_t id ) { printf("Called rem_dir( id: %d )\n", id ); int i, children; struct FS_Directory* dir_list; children = get_num_children( id ); // If there are no more children, delete directory if ( children == 0 ) { printf("No children in id: %d\n", id ); rem_dir_leaf( id ); return id; } // Call function recursively over all the child dirs dir_list = get_children( id ); for ( i = 0; i < children; i++ ) rem_dir( dir_list[i].id ); free( dir_list ); // Upon return, all children are deleted, now delete this printf("Returned for id: %d, and removing this\n", id ); rem_dir_leaf( id ); // TODO handle errors }
/******************************************************** * print_dir * * Takes the a directory's ID and prints the child * directories and files within. ********************************************************/ void print_dir( uint32_t id ) { uint32_t i, num_children, num_files; struct FS_Directory *children; struct FS_File *dir_files; num_children = get_num_children( id ); if ( num_children ) { children = get_children( id ); for ( i = 0; i < num_children; i++ ) printf("%s\n", children[i].name ); free( children ); } num_files = get_num_files( id ); if ( num_files ) { dir_files = get_files( id ); for ( i = 0; i < num_files; i++ ) printf("%s\n", dir_files[i].name ); free( dir_files ); } }
// Free entire tree void free_tree(Node *root) { if(root != NULL) { int i; int num_children = get_num_children(root); Node **children = get_children(root); for(i = 0; i < num_children; i++) { free_tree(children[i]); } free_node(root); } }
/************************************************************************* * Get the total number of edges under in a tree * by recursively summing all of the children. *************************************************************************/ int get_num_edges (TREE_T * const a_tree) { assert(a_tree != NULL); TREE_T* child = NULL; int num_children = get_num_children(a_tree); int num_edges = num_children; int c = 0; for (c = 0; c < num_children; c++) { child = get_nth_child(c, a_tree); num_edges += get_num_edges(child); } return num_edges; }
/************************************************************************* * What is the total branch length of the given tree? *************************************************************************/ float get_total_length (TREE_T * const a_tree) { check_null_tree(a_tree); int num_children = get_num_children(a_tree); double length = get_length(a_tree); int c = 0; if (!is_leaf(a_tree)) { for (c = 0; c < num_children; c++) { TREE_T* child = get_nth_child(c, a_tree); length += get_total_length(child); } } return length; } // get_total_length
// --------------------------------------------------------------- void if_statement::do_analyze_context() { const int num_children = get_num_children(); assert(num_children == 2 || num_children == 3); _condition = &get_child(0); _condition->analyze_context(this); if (_condition->get_type() != TARD_TYPE_BOOL) throw tard_exception(tard_exception::IF_CONDITION_MUST_BE_BOOLEAN); _then = &get_child(1); _then->analyze_context(this); if (num_children == 3) { _else = &get_child(2); _else->analyze_context(this); } }
// --------------------------------------------------------------- void fun_def::do_analyze_context() { const char* entry_point = "main"; assert(get_num_children() == 3); _type = &get_child(0).as<type>(); _identifier = &get_child(1).as<identifier>(); _block = &get_child(2).as<block>(); _type->analyze_context(this); _identifier->analyze_context(this); _block->analyze_context(this); _is_entry_point = entry_point == _identifier->name(); const type_tag fun_ret_type = get_type(); const type_tag block_type = _block->get_type(); if (fun_ret_type != TARD_TYPE_VOID && fun_ret_type != block_type) throw tard_exception(tard_exception::TYPE_MISMATCH); }
/************************************************************************* * Entry point for pmp_bf *************************************************************************/ int main(int argc, char *argv[]) { char* bg_filename = NULL; char* motif_name = "motif"; // Use this motif name in the output. STRING_LIST_T* selected_motifs = NULL; double fg_rate = 1.0; double bg_rate = 1.0; double purine_pyrimidine = 1.0; // r double transition_transversion = 0.5; // R double pseudocount = 0.1; GAP_SUPPORT_T gap_support = SKIP_GAPS; MODEL_TYPE_T model_type = F81_MODEL; BOOLEAN_T use_halpern_bruno = FALSE; char* ustar_label = NULL; // TLB; create uniform star tree int i; program_name = "pmp_bf"; /********************************************** * COMMAND LINE PROCESSING **********************************************/ // Define command line options. (FIXME: Repeated code) // FIXME: Note that if you add or remove options you // must change n_options. int n_options = 12; cmdoption const pmp_options[] = { {"hb", NO_VALUE}, {"ustar", REQUIRED_VALUE}, {"model", REQUIRED_VALUE}, {"pur-pyr", REQUIRED_VALUE}, {"transition-transversion", REQUIRED_VALUE}, {"bg", REQUIRED_VALUE}, {"fg", REQUIRED_VALUE}, {"motif", REQUIRED_VALUE}, {"motif-name", REQUIRED_VALUE}, {"bgfile", REQUIRED_VALUE}, {"pseudocount", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE} }; int option_index = 0; // Define the usage message. char usage[1000] = ""; strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n"); strcat(usage, "\n"); strcat(usage, " Options:\n"); // Evolutionary model parameters. strcat(usage, " --hb\n"); strcat(usage, " --model single|average|jc|k2|f81|f84|hky|tn"); strcat(usage, " (default=f81)\n"); strcat(usage, " --pur-pyr <float> (default=1.0)\n"); strcat(usage, " --transition-transversion <float> (default=0.5)\n"); strcat(usage, " --bg <float> (default=1.0)\n"); strcat(usage, " --fg <float> (default=1.0)\n"); // Motif parameters. strcat(usage, " --motif <id> (default=all)\n"); strcat(usage, " --motif-name <string> (default from motif file)\n"); // Miscellaneous parameters strcat(usage, " --bgfile <background> (default from motif file)\n"); strcat(usage, " --pseudocount <float> (default=0.1)\n"); strcat(usage, " --ustar <label>\n"); // TLB; create uniform star tree strcat(usage, " --verbosity [1|2|3|4] (default 2)\n"); strcat(usage, "\n Prints the FP and FN rate at each of 10000 score values.\n"); strcat(usage, "\n Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n"); // Parse the command line. if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) { die("Error processing command line options: option name too long.\n"); } while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); die("Error processing command line options (%s)\n", message); } if (strcmp(option_name, "model") == 0) { if (strcmp(option_value, "jc") == 0) { model_type = JC_MODEL; } else if (strcmp(option_value, "k2") == 0) { model_type = K2_MODEL; } else if (strcmp(option_value, "f81") == 0) { model_type = F81_MODEL; } else if (strcmp(option_value, "f84") == 0) { model_type = F84_MODEL; } else if (strcmp(option_value, "hky") == 0) { model_type = HKY_MODEL; } else if (strcmp(option_value, "tn") == 0) { model_type = TAMURA_NEI_MODEL; } else if (strcmp(option_value, "single") == 0) { model_type = SINGLE_MODEL; } else if (strcmp(option_value, "average") == 0) { model_type = AVERAGE_MODEL; } else { die("Unknown model: %s\n", option_value); } } else if (strcmp(option_name, "hb") == 0){ use_halpern_bruno = TRUE; } else if (strcmp(option_name, "ustar") == 0){ // TLB; create uniform star tree ustar_label = option_value; } else if (strcmp(option_name, "pur-pyr") == 0){ purine_pyrimidine = atof(option_value); } else if (strcmp(option_name, "transition-transversion") == 0){ transition_transversion = atof(option_value); } else if (strcmp(option_name, "bg") == 0){ bg_rate = atof(option_value); } else if (strcmp(option_name, "fg") == 0){ fg_rate = atof(option_value); } else if (strcmp(option_name, "motif") == 0){ if (selected_motifs == NULL) { selected_motifs = new_string_list(); } add_string(option_value, selected_motifs); } else if (strcmp(option_name, "motif-name") == 0){ motif_name = option_value; } else if (strcmp(option_name, "bgfile") == 0){ bg_filename = option_value; } else if (strcmp(option_name, "pseudocount") == 0){ pseudocount = atof(option_value); } else if (strcmp(option_name, "verbosity") == 0){ verbosity = atoi(option_value); } } // Must have tree and motif file names if (argc != option_index + 2) { fprintf(stderr, "%s", usage); exit(EXIT_FAILURE); } /********************************************** * Read the phylogenetic tree. **********************************************/ char* tree_filename = NULL; TREE_T* tree = NULL; tree_filename = argv[option_index]; option_index++; tree = read_tree_from_file(tree_filename); // get the species names STRING_LIST_T* alignment_species = make_leaf_list(tree); char *root_label = get_label(tree); // in case target in center if (strlen(root_label)>0) add_string(root_label, alignment_species); //write_string_list(" ", alignment_species, stderr); // TLB; Convert the tree to a uniform star tree with // the target sequence at its center. if (ustar_label != NULL) { tree = convert_to_uniform_star_tree(tree, ustar_label); if (tree == NULL) die("Tree or alignment missing target %s\n", ustar_label); if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Target %s placed at center of uniform (d=%.3f) star tree:\n", ustar_label, get_total_length(tree) / get_num_children(tree) ); write_tree(tree, stderr); } } /********************************************** * Read the motifs. **********************************************/ char* meme_filename = argv[option_index]; option_index++; int num_motifs = 0; MREAD_T *mread; ALPH_T alph; ARRAYLST_T *motifs; ARRAY_T *bg_freqs; mread = mread_create(meme_filename, OPEN_MFILE); mread_set_bg_source(mread, bg_filename); mread_set_pseudocount(mread, pseudocount); // read motifs motifs = mread_load(mread, NULL); alph = mread_get_alphabet(mread); bg_freqs = mread_get_background(mread); // check if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename); // TLB; need to resize bg_freqs array to ALPH_SIZE items // or copy array breaks in HB mode. This throws away // the freqs for the ambiguous characters; int asize = alph_size(alph, ALPH_SIZE); resize_array(bg_freqs, asize); /************************************************************** * Compute probability distributions for each of the selected motifs. **************************************************************/ int motif_index; for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) { MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs); char* motif_id = get_motif_id(motif); char* bare_motif_id = motif_id; // We may have specified on the command line that // only certain motifs were to be used. if (selected_motifs != NULL) { if (*bare_motif_id == '+' || *bare_motif_id == '-') { // The selected motif id won't included a strand indicator. bare_motif_id++; } if (have_string(bare_motif_id, selected_motifs) == FALSE) { continue; } } if (verbosity >= NORMAL_VERBOSE) { fprintf( stderr, "Using motif %s of width %d.\n", motif_id, get_motif_length(motif) ); } // Build an array of evolutionary models for each position in the motif. EVOMODEL_T** models = make_motif_models( motif, bg_freqs, model_type, fg_rate, bg_rate, purine_pyrimidine, transition_transversion, use_halpern_bruno ); // Get the frequencies under the background model (row 0) // and position-dependent scores (rows 1..w) // for each possible alignment column. MATRIX_T* pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, get_motif_length(motif) + 1, models, tree, gap_support ); ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs); remove_matrix_row(0, pssm_matrix); // throw away first row //print_col_frequencies(alph, alignment_col_freqs); // // Get the position-dependent null model alignment column frequencies // int w = get_motif_length(motif); int ncols = get_num_cols(pssm_matrix); MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols); for (i=0; i<w; i++) { // get the evo model corresponding to this column of the motif // and store it as the first evolutionary model. myfree(models[0]); // Use motif PSFM for equilibrium freqs. for model. ARRAY_T* site_specific_freqs = allocate_array(asize); int j = 0; for(j = 0; j < asize; j++) { double value = get_matrix_cell(i, j, get_motif_freqs(motif)); set_array_item(j, value, site_specific_freqs); } if (use_halpern_bruno == FALSE) { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, site_specific_freqs, NULL ); } else { models[0] = make_model( model_type, fg_rate, transition_transversion, purine_pyrimidine, bg_freqs, site_specific_freqs ); } // get the alignment column frequencies using this model MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix( alph, alignment_species, 2, // only interested in freqs under bkg models, tree, gap_support ); // assemble the position-dependent background alignment column freqs. set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg); // chuck the pssm (not his real name) free_matrix(tmp_pssm_matrix); } // // Compute and print the score distribution under the background model // and under the (position-dependent) motif model. // int range = 10000; // 10^4 gives same result as 10^5, but 10^3 differs // under background model PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); // under position-dependent background (motif) model PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range); get_pv_lookup_pos_dep( pssm_pos_dep, pos_dep_bkg, NULL // no priors used ); // print FP and FN distributions int num_items = get_pssm_pv_length(pssm_pos_dep); for (i=0; i<num_items; i++) { double pvf = get_pssm_pv(i, pssm); double pvt = get_pssm_pv(i, pssm_pos_dep); double fpr = pvf; double fnr = 1 - pvt; if (fpr >= 0.99999 || fnr == 0) continue; printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr); } // free stuff free_pssm(pssm); free_pssm(pssm_pos_dep); if (models != NULL) { int model_index; int num_models = get_motif_length(motif) + 1; for (model_index = 0; model_index < num_models; model_index++) { free_model(models[model_index]); } myfree(models); } } // motif arraylst_destroy(destroy_motif, motifs); /********************************************** * Clean up. **********************************************/ // TLB may have encountered a memory corruption bug here // CEG has not been able to reproduce it. valgrind says all is well. free_array(bg_freqs); free_tree(TRUE, tree); free_string_list(selected_motifs); return(0); } // main
sexpr * sexpr::get_child(unsigned idx) const { SASSERT(idx < get_num_children()); return static_cast<sexpr_composite const *>(this)->m_children[idx]; }