Пример #1
0
/*************************************************************************
 * Remove the nth child of a given tree.
 *
 * Does not free dynamic memory. 
 *************************************************************************/
void remove_nth_child
  (const VERBOSE_T verbosity,
   const BOOLEAN free_child,		// free child node?
   const BOOLEAN_T free_children, 	/* Boolean: Free children as well? */
   const int       n,
   TREE_T *        a_tree)
{
  int i_child;

  check_null_tree(a_tree);

  /* Make sure the child exists. */
  if (n >= get_num_children(a_tree)) {
    die("Attempted to remove child %d from a tree with %d children.\n", 
	n, get_num_children(a_tree));
  }

  if (verbosity >= HIGHER_VERBOSE) {
      fprintf(stderr, "Removing child %s from tree %s.\n", 
	      get_key(a_tree->children[n]), get_key(a_tree));
  }

  /* Free the child and its children. */
  if (free_child) free_tree(free_children, a_tree->children[n]);

  /* Move the remaining children left one slot. */
  for (i_child = n; i_child < get_num_children(a_tree) - 1; i_child++) {
    a_tree->children[i_child] = a_tree->children[i_child+1];
  }

  /* Decrement the number of children. */
  (a_tree->num_children)--;

} // remove_nth_child
Пример #2
0
/*************************************************************************
 * Add a given child to a tree.
 *************************************************************************/
void add_child
  (const VERBOSE_T verbosity,
   TREE_T * const  a_child,
   TREE_T *        a_tree)
{
  check_null_tree(a_tree);

  /* Make sure we don't put too many children in the tree. */
  if (get_num_children(a_tree) >= MAX_DEGREE) {
    die("Attempted to add %s to tree (%s) with maximum degree (%d).\n",
	get_key(a_child), get_key(a_tree), get_num_children(a_tree));
  }

  if (verbosity >= HIGHER_VERBOSE) {
      fprintf(stderr, "Adding child %s to tree %s.\n", get_key(a_child),
	      get_key(a_tree));
  }

  /* Add the child to the tree. */
  a_tree->children[get_num_children(a_tree)] = a_child;
  (a_tree->num_children)++;

  /* Update the number of descendants, if necessary. */
  if (a_tree->has_descendants) {
    compute_descendants(a_child, FALSE);
    a_tree->num_descendants += a_child->num_descendants;
  }
  
}
Пример #3
0
/*************************************************************************
 * Retrieve the nth child of a given tree.
 *************************************************************************/
TREE_T * get_nth_child
  (const int      n,
   TREE_T * const a_tree)
{
  check_null_tree(a_tree);

  /* Make sure the child exists. */
  if (n >= get_num_children(a_tree)) {
    die("Attempted to retrieve child %d from a tree with %d children.\n", 
	n, get_num_children(a_tree));
  }

  /* Return the requested child. */
  return(a_tree->children[n]);
}
Пример #4
0
/*************************************************************************
  This function populates a trans_matix_array with pointers to matrices 
  and the corresponding time values indexed by the edge number of the 
  phylogenetic tree.  The edges are numbered in depth-first order.

  The three parameters are an evolutinary model, a phylogentic
  tree, and a pointer to substmatrix_array structure.

  The function returns an integer containing the number of matrices
  added to the substmatrix_array structure.
 *************************************************************************/
static int populate_substmatrix_array(
  EVOMODEL_T* model, // IN
  TREE_T* tree, // IN
  int current_position, // IN
  SUBSTMATRIX_ARRAY_T* array // OUT
) {
  // Recursively descend the tree, depth first
  int num_children = get_num_children(tree);
  if (is_leaf(tree) != TRUE) {
    int c = 0;
    for (c = 0; c < num_children; c++) {
      TREE_T* child = get_nth_child(c, tree);
      double t = get_length(child);
      set_substmatrix_time(array, current_position, t);
      MATRIX_T* prob_matrix = get_model_prob_matrix(t, model);
      set_substmatrix_matrix(array, current_position, prob_matrix);
      free_matrix(prob_matrix);
      current_position = populate_substmatrix_array(
        model, 
        child, 
        current_position + 1,
        array
      );
    }
  }
  return current_position;
}
Пример #5
0
/*************************************************************************
 * Compute the maximum depth of a tree.
 *************************************************************************/
int compute_depth
  (TREE_T * const a_tree)
{
  /* Base case: leaf. */
  if (is_leaf(a_tree)) {
    return(1);
  }

  /* Recursive case: internal node. */
  else {
    int max_depth = 0;
    int num_children = get_num_children(a_tree);
    int i_child;
    for (i_child = 0; i_child < num_children; i_child++) {
      int this_depth = compute_depth(get_nth_child(i_child, a_tree));
      if (this_depth > max_depth) {
	max_depth = this_depth;
      }
    }
    return(max_depth + 1);
  }
  /* Unreachable. */
  abort();
  return(0);
}
Пример #6
0
int rem_dir( uint32_t id )
{
	printf("Called rem_dir( id: %d )\n", id );	
	int i, children;
	struct FS_Directory* dir_list;

	children = get_num_children( id );

	// If there are no more children, delete directory
	if ( children == 0 )
	{
		printf("No children in id: %d\n", id );		
		rem_dir_leaf( id );
		return id;
	}

	// Call function recursively over all the child dirs
	dir_list = get_children( id );
	for ( i = 0; i < children; i++ ) 
		rem_dir( dir_list[i].id );
	free( dir_list );

	// Upon return, all children are deleted, now delete this
	printf("Returned for id: %d, and removing this\n", id );
	rem_dir_leaf( id );

// TODO handle errors
	
}
Пример #7
0
/********************************************************
 *	print_dir
 *
 * Takes the a directory's ID and prints the child
 * directories and files within.
 ********************************************************/
void print_dir( uint32_t id )
{
	uint32_t i, num_children, num_files;
	struct FS_Directory *children;
	struct FS_File *dir_files;

	num_children = get_num_children( id );
	if ( num_children )
	{
		children = get_children( id );
		for ( i = 0; i < num_children; i++ )
			printf("%s\n", children[i].name );
		free( children );
	}
	

	num_files = get_num_files( id );
	if ( num_files )
	{
		dir_files = get_files( id );
		for ( i = 0; i < num_files; i++ )
			printf("%s\n", dir_files[i].name );
		free( dir_files );
	}
		
}
Пример #8
0
// Free entire tree
void free_tree(Node *root) {
    if(root != NULL) {
        int i;
        int num_children = get_num_children(root);
        Node **children = get_children(root);
        for(i = 0; i < num_children; i++) {
            free_tree(children[i]);
        }
        free_node(root);
    }
}
Пример #9
0
/*************************************************************************
 * Get the total number of edges under in a tree
 * by recursively summing all of the children.
 *************************************************************************/
int get_num_edges
  (TREE_T * const a_tree)
{
  assert(a_tree != NULL);
  TREE_T* child = NULL;
  int num_children = get_num_children(a_tree);
  int num_edges = num_children;
  int c = 0;
  for (c = 0; c < num_children; c++) {
    child = get_nth_child(c, a_tree);
    num_edges += get_num_edges(child);
  }
  return num_edges;
}
Пример #10
0
/*************************************************************************
 * What is the total branch length of the given tree?
 *************************************************************************/
float get_total_length
  (TREE_T * const a_tree)
{
  check_null_tree(a_tree);
  int num_children = get_num_children(a_tree);
  double length = get_length(a_tree);
  int c = 0;

  if (!is_leaf(a_tree)) {
    for (c = 0; c < num_children; c++) {
      TREE_T* child = get_nth_child(c, a_tree);
      length += get_total_length(child);
    }
  }

  return length;
} // get_total_length
Пример #11
0
// ---------------------------------------------------------------
void if_statement::do_analyze_context()
{
    const int num_children = get_num_children();
    assert(num_children == 2 || num_children == 3);

    _condition = &get_child(0);
    _condition->analyze_context(this);

    if (_condition->get_type() != TARD_TYPE_BOOL)
        throw tard_exception(tard_exception::IF_CONDITION_MUST_BE_BOOLEAN);

    _then = &get_child(1);
    _then->analyze_context(this);

    if (num_children == 3)
    {
        _else = &get_child(2);
        _else->analyze_context(this);
    }
}
Пример #12
0
// ---------------------------------------------------------------
void fun_def::do_analyze_context()
{
	const char* entry_point = "main";

	assert(get_num_children() == 3);
	
	_type	    = &get_child(0).as<type>();
	_identifier = &get_child(1).as<identifier>();
	_block		= &get_child(2).as<block>();
	
	_type->analyze_context(this);
	_identifier->analyze_context(this);
	_block->analyze_context(this);

	_is_entry_point = entry_point == _identifier->name();

	const type_tag fun_ret_type = get_type();
	const type_tag block_type   = _block->get_type();

	if (fun_ret_type != TARD_TYPE_VOID && fun_ret_type != block_type)
		throw tard_exception(tard_exception::TYPE_MISMATCH);
}
Пример #13
0
/*************************************************************************
 * Entry point for pmp_bf
 *************************************************************************/
int main(int argc, char *argv[]) {

  char* bg_filename = NULL;
  char* motif_name = "motif"; // Use this motif name in the output.
  STRING_LIST_T* selected_motifs = NULL;
  double fg_rate = 1.0;
  double bg_rate = 1.0;
  double purine_pyrimidine = 1.0; // r
  double transition_transversion = 0.5; // R
  double pseudocount = 0.1;
  GAP_SUPPORT_T gap_support = SKIP_GAPS;
  MODEL_TYPE_T model_type = F81_MODEL;
  BOOLEAN_T use_halpern_bruno = FALSE;
  char* ustar_label = NULL;	// TLB; create uniform star tree
  int i;

  program_name = "pmp_bf";

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/

  // Define command line options. (FIXME: Repeated code)
  // FIXME: Note that if you add or remove options you
  // must change n_options.
  int n_options = 12;
  cmdoption const pmp_options[] = {
    {"hb", NO_VALUE},
    {"ustar", REQUIRED_VALUE},
    {"model", REQUIRED_VALUE},
    {"pur-pyr", REQUIRED_VALUE},
    {"transition-transversion", REQUIRED_VALUE},
    {"bg", REQUIRED_VALUE},
    {"fg", REQUIRED_VALUE},
    {"motif", REQUIRED_VALUE},
    {"motif-name", REQUIRED_VALUE},
    {"bgfile", REQUIRED_VALUE},
    {"pseudocount", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE}
  };

  int option_index = 0;

  // Define the usage message.
  char      usage[1000] = "";
  strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n");
  strcat(usage, "\n");
  strcat(usage, "   Options:\n");

  // Evolutionary model parameters.
  strcat(usage, "     --hb\n");
  strcat(usage, "     --model single|average|jc|k2|f81|f84|hky|tn");
  strcat(usage, " (default=f81)\n");
  strcat(usage, "     --pur-pyr <float> (default=1.0)\n");
  strcat(usage, "     --transition-transversion <float> (default=0.5)\n");
  strcat(usage, "     --bg <float> (default=1.0)\n");
  strcat(usage, "     --fg <float> (default=1.0)\n");

  // Motif parameters.
  strcat(usage, "     --motif <id> (default=all)\n");
  strcat(usage, "     --motif-name <string> (default from motif file)\n");

  // Miscellaneous parameters
  strcat(usage, "     --bgfile <background> (default from motif file)\n");
  strcat(usage, "     --pseudocount <float> (default=0.1)\n");
  strcat(usage, "     --ustar <label>\n");	// TLB; create uniform star tree
  strcat(usage, "     --verbosity [1|2|3|4] (default 2)\n");
  strcat(usage, "\n    Prints the FP and FN rate at each of 10000 score values.\n");
  strcat(usage, "\n    Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n");

  // Parse the command line.
  if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) {
    die("Error processing command line options: option name too long.\n");
  }

  while (TRUE) { 
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;

    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
      (void) simple_getopterror(&message);
      die("Error processing command line options (%s)\n", message);
    }
    
    if (strcmp(option_name, "model") == 0) {
      if (strcmp(option_value, "jc") == 0) {
        model_type = JC_MODEL;
      } else if (strcmp(option_value, "k2") == 0) {
        model_type = K2_MODEL;
      } else if (strcmp(option_value, "f81") == 0) {
        model_type = F81_MODEL;
      } else if (strcmp(option_value, "f84") == 0) {
        model_type = F84_MODEL;
      } else if (strcmp(option_value, "hky") == 0) {
        model_type = HKY_MODEL;
      } else if (strcmp(option_value, "tn") == 0) {
        model_type = TAMURA_NEI_MODEL;
      } else if (strcmp(option_value, "single") == 0) {
        model_type = SINGLE_MODEL;
      } else if (strcmp(option_value, "average") == 0) {
        model_type = AVERAGE_MODEL;
      } else {
        die("Unknown model: %s\n", option_value);
      }
    } else if (strcmp(option_name, "hb") == 0){
        use_halpern_bruno = TRUE;
    } else if (strcmp(option_name, "ustar") == 0){	// TLB; create uniform star tree
        ustar_label = option_value;
    } else if (strcmp(option_name, "pur-pyr") == 0){
        purine_pyrimidine = atof(option_value);
    } else if (strcmp(option_name, "transition-transversion") == 0){
        transition_transversion = atof(option_value);
    } else if (strcmp(option_name, "bg") == 0){
      bg_rate = atof(option_value);
    } else if (strcmp(option_name, "fg") == 0){
      fg_rate = atof(option_value);
    } else if (strcmp(option_name, "motif") == 0){
        if (selected_motifs == NULL) {
          selected_motifs = new_string_list();
        }
       add_string(option_value, selected_motifs);
    } else if (strcmp(option_name, "motif-name") == 0){
        motif_name = option_value;
    } else if (strcmp(option_name, "bgfile") == 0){
      bg_filename = option_value;
    } else if (strcmp(option_name, "pseudocount") == 0){
        pseudocount = atof(option_value);
    } else if (strcmp(option_name, "verbosity") == 0){
        verbosity = atoi(option_value);
    }
  }

  // Must have tree and motif file names
  if (argc != option_index + 2) {
    fprintf(stderr, "%s", usage);
    exit(EXIT_FAILURE);
  } 

  /**********************************************
   * Read the phylogenetic tree.
   **********************************************/
  char* tree_filename = NULL;
  TREE_T* tree = NULL;
  tree_filename = argv[option_index];
  option_index++;
  tree = read_tree_from_file(tree_filename);

  // get the species names
  STRING_LIST_T* alignment_species = make_leaf_list(tree);
  char *root_label = get_label(tree);	// in case target in center
  if (strlen(root_label)>0) add_string(root_label, alignment_species);
  //write_string_list(" ", alignment_species, stderr);

  // TLB; Convert the tree to a uniform star tree with
  // the target sequence at its center.
  if (ustar_label != NULL) {
    tree = convert_to_uniform_star_tree(tree, ustar_label);
    if (tree == NULL) 
      die("Tree or alignment missing target %s\n", ustar_label);
    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(stderr, 
	"Target %s placed at center of uniform (d=%.3f) star tree:\n", 
          ustar_label, get_total_length(tree) / get_num_children(tree) 
      );
      write_tree(tree, stderr);
    }
  }

  /**********************************************
   * Read the motifs.
   **********************************************/
  char* meme_filename = argv[option_index];
  option_index++;
  int num_motifs = 0; 

  MREAD_T *mread;
  ALPH_T alph;
  ARRAYLST_T *motifs;
  ARRAY_T *bg_freqs;

  mread = mread_create(meme_filename, OPEN_MFILE);
  mread_set_bg_source(mread, bg_filename);
  mread_set_pseudocount(mread, pseudocount);
  // read motifs
  motifs = mread_load(mread, NULL);
  alph = mread_get_alphabet(mread);
  bg_freqs = mread_get_background(mread);
  // check
  if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename);

  

  // TLB; need to resize bg_freqs array to ALPH_SIZE items
  // or copy array breaks in HB mode.  This throws away
  // the freqs for the ambiguous characters;
  int asize = alph_size(alph, ALPH_SIZE);
  resize_array(bg_freqs, asize);

  /**************************************************************
  * Compute probability distributions for each of the selected motifs.
  **************************************************************/
  int motif_index;
  for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) {

    MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs);
    char* motif_id = get_motif_id(motif);
    char* bare_motif_id = motif_id;

    // We may have specified on the command line that
    // only certain motifs were to be used.
    if (selected_motifs != NULL) {
      if (*bare_motif_id == '+' || *bare_motif_id == '-') {
        // The selected  motif id won't included a strand indicator.
        bare_motif_id++;
      }
      if (have_string(bare_motif_id, selected_motifs) == FALSE) {
        continue;
      }
    }

    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(
        stderr, 
        "Using motif %s of width %d.\n",
        motif_id, get_motif_length(motif)
      );
    }

    // Build an array of evolutionary models for each position in the motif.
    EVOMODEL_T** models = make_motif_models(
      motif, 
      bg_freqs,
      model_type,
      fg_rate, 
      bg_rate, 
      purine_pyrimidine, 
      transition_transversion, 
      use_halpern_bruno
    );

    // Get the frequencies under the background model (row 0) 
    // and position-dependent scores (rows 1..w)
    // for each possible alignment column.
    MATRIX_T* pssm_matrix = build_alignment_pssm_matrix(
      alph,
      alignment_species,
      get_motif_length(motif) + 1, 
      models, 
      tree, 
      gap_support
    );
    ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); 
    copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs);
    remove_matrix_row(0, pssm_matrix);		// throw away first row
    //print_col_frequencies(alph, alignment_col_freqs);

    //
    // Get the position-dependent null model alignment column frequencies
    //
    int w = get_motif_length(motif);
    int ncols = get_num_cols(pssm_matrix); 
    MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols);
    for (i=0; i<w; i++) {
      // get the evo model corresponding to this column of the motif
      // and store it as the first evolutionary model.
      myfree(models[0]);
      // Use motif PSFM for equilibrium freqs. for model.
      ARRAY_T* site_specific_freqs = allocate_array(asize);
      int j = 0;
      for(j = 0; j < asize; j++) {
	double value = get_matrix_cell(i, j, get_motif_freqs(motif));
	set_array_item(j, value, site_specific_freqs);
      }
      if (use_halpern_bruno == FALSE) {
	models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  site_specific_freqs,
          NULL
	);
      } else {
        models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  bg_freqs,
	  site_specific_freqs
	);
      }
      // get the alignment column frequencies using this model
      MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix(
        alph,
	alignment_species,
	2,				// only interested in freqs under bkg
	models, 
	tree, 
	gap_support
      );
      // assemble the position-dependent background alignment column freqs.
      set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg);
      // chuck the pssm (not his real name)
      free_matrix(tmp_pssm_matrix);
    }

    //
    // Compute and print the score distribution under the background model
    // and under the (position-dependent) motif model.
    //
    int range = 10000;	// 10^4 gives same result as 10^5, but 10^3 differs

    // under background model
    PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);

    // under position-dependent background (motif) model
    PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);
    get_pv_lookup_pos_dep(
      pssm_pos_dep, 
      pos_dep_bkg, 
      NULL // no priors used
    );

    // print FP and FN distributions
    int num_items = get_pssm_pv_length(pssm_pos_dep);
    for (i=0; i<num_items; i++) {
      double pvf = get_pssm_pv(i, pssm);
      double pvt = get_pssm_pv(i, pssm_pos_dep);
      double fpr = pvf;
      double fnr = 1 - pvt;
      if (fpr >= 0.99999 || fnr == 0) continue;
      printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr);
    }

    // free stuff
    free_pssm(pssm);
    free_pssm(pssm_pos_dep);
    if (models != NULL) {
      int model_index;
      int num_models = get_motif_length(motif) + 1;
      for (model_index = 0; model_index < num_models; model_index++) {
        free_model(models[model_index]);
      }
      myfree(models);
    }

  } // motif

  arraylst_destroy(destroy_motif, motifs);

  /**********************************************
   * Clean up.
   **********************************************/
  // TLB may have encountered a memory corruption bug here
  // CEG has not been able to reproduce it. valgrind says all is well.
  free_array(bg_freqs);
  free_tree(TRUE, tree);
  free_string_list(selected_motifs);

  return(0);
} // main
Пример #14
0
sexpr * sexpr::get_child(unsigned idx) const {
    SASSERT(idx < get_num_children());
    return static_cast<sexpr_composite const *>(this)->m_children[idx];
}