/*****************************************************************************
 * MEME > motifs > motif
 * Construct the skeleton of a motif.
 ****************************************************************************/
void mxml_start_motif(void *ctx, char *id, char *name, char *alt, int width, double sites, 
    double llr, double ic, double re, double bayes_threshold,
    double log10_evalue, double elapsed_time, char *url) {
  CTX_T *data;
  MOTIF_T *motif;
  
  data = (CTX_T*)ctx;
  data->mscope.motif = mm_malloc(sizeof(MOTIF_T));
  motif = data->mscope.motif;
  memset(motif, 0, sizeof(MOTIF_T));
  set_motif_id(name, strlen(name), motif);
  set_motif_id2(alt, sizeof(alt), motif);
  set_motif_strand('+', motif);
  motif->length = width;
  motif->num_sites = sites;
  motif->url = strdup(url);
  motif->log_evalue = log10_evalue;
  motif->evalue = pow(10.0, log10_evalue);
  // calculate alphabet size
  motif->alph = alph_hold(data->alph);
  motif->flags = (data->fscope.strands == 2 ? MOTIF_BOTH_STRANDS : 0);
  // allocate matricies
  motif->freqs = allocate_matrix(motif->length, alph_size_core(motif->alph));
  init_matrix(-1, motif->freqs);
  motif->scores = allocate_matrix(motif->length, alph_size_core(motif->alph));
  init_matrix(NO_SCORE, motif->scores);
  // should be set by a post processing method
  motif->complexity = -1;
  motif->trim_left = 0;
  motif->trim_right = 0;
  // cache motif position
  if (data->options & SCANNED_SITES) {
    rbtree_put(data->motif_lookup, id, &(data->current_motif));
  }
}
/*****************************************************************************
 * MEME > training_set > sequence
 ****************************************************************************/
void mxml_sequence(void *ctx, char *id, char *name, int length, double weight) {
  CTX_T *data;

  data = (CTX_T*)ctx;
  if (data->options & SCANNED_SITES) {
    rbtree_put(data->sequence_lookup, id, create_seqinfo(name, length));
  }
}
Ejemplo n.º 3
0
/*
 * Checks for infinite loops. Every parsing state must either consume
 * some data or change the state to one that hasn't been used at this
 * position. As there are a finite number of states this ensures that
 * parsing will stop at some point or be detected by this function.
 */
static bool loop_check(JSONRD_T *jsonrd, PS_EN prior_state, int consumed) {
  RBTREE_T *prior_states;
  PS_EN new_state;
  bool is_new_state;
  prior_states = jsonrd->prior_states;
  if (consumed == 0) {
    new_state = jsonrd->state;
    if (rbtree_size(prior_states) == 0) {
      if (prior_state == new_state) return true;
      rbtree_put(prior_states, &prior_state, NULL);
      rbtree_put(prior_states, &new_state, NULL);
    } else {
      rbtree_lookup(prior_states, &new_state, true, &is_new_state);
      if (!is_new_state) return true;
    }
  } else {
    rbtree_clear(prior_states);
  }
  return false;
}
Ejemplo n.º 4
0
int try_rbtree() {
    RBTree *t = rbtree_create();
    long int i;
    for (i = 0; i != 10; i++) {
        rbtree_put(t, random() % 100, (void *)i);
        // rbtree_show(t);
        // printf("--------------------------\n");
    }
    rbtree_show(t);

    return 0;
}
Ejemplo n.º 5
0
/*
 * Load background file frequencies into the array.
 */
ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) {
  regmatch_t matches[4];
  STR_T *line;
  char chunk[BG_CHUNK_SIZE+1], letter[2], *key;
  int size, terminate, offset, i;
  FILE *fp;
  regex_t bgfreq;
  double freq;
  RBTREE_T *letters;
  RBNODE_T *node;
  
  regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED);
  letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free);
  line = str_create(100);
  if (!(fp = fopen(bg_filename, "r"))) {
    die("Unable to open background file \"%s\" for reading.\n", bg_filename);
  }
  
  terminate = feof(fp);
  while (!terminate) {
    size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp);
    chunk[size] = '\0';
    terminate = feof(fp);
    offset = 0;
    while (offset < size) {
      // skip mac newline
      if (str_len(line) == 0 && chunk[offset] == '\r') {
        offset++;
        continue;
      }
      // find next new line
      for (i = offset; i < size; ++i) {
        if (chunk[i] == '\n') break;
      }
      // append portion up to the new line or end of chunk
      str_append(line, chunk+offset, i - offset);
      // read more if we didn't find a new line
      if (i == size && !terminate) break;
      // move the offset past the new line
      offset = i + 1;
      // handle windows new line
      if (str_char(line, -1) == '\r') str_truncate(line, -1);
      // remove everything to the right of a comment character
      for (i = 0; i < str_len(line); ++i) {
        if (str_char(line, i) == '#') {
          str_truncate(line, i);
          break;
        }
      }
      // check the line for a single letter followed by a number
      if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) {
        // parse the letter and frequency value
        regex_strncpy(matches+1, str_internal(line), letter, 2);
        freq = regex_dbl(matches+2, str_internal(line));
        // check the frequency is acceptable
        if (freq < 0 || freq > 1) {
          die("The background file lists the illegal probability %g for "
            "the letter %s.\n", freq, letter);
        } else if (freq == 0) {
          die("The background file lists a probability of zero for the "
            "letter %s\n", letter);
        }
        if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq);
      }
      str_clear(line);
    }
  }
  // finished with the file so clean up file parsing stuff
  fclose(fp);
  str_destroy(line, FALSE);
  regfree(&bgfreq);
  // guess the alphabet
  if (*alph == INVALID_ALPH) {
    switch (rbtree_size(letters)) {
      case PROTEIN_ASIZE:
        *alph = PROTEIN_ALPH;
        break;
      case DNA_ASIZE:
        *alph = DNA_ALPH;
        break;
      default:
        die("Number of single character entries in background does not match "
            "an alphabet.\n");
    }
  }
  // make the background
  if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE));
  assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE));
  init_array(-1, freqs);
  for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) {
    key = (char*)rbtree_key(node);
    i = alph_index(*alph, key[0]);
    freq = *((double*)rbtree_value(node));
    if (i == -1) {
      die("Background contains letter %s which is not in the %s alphabet.\n", 
          key, alph_name(*alph));
    }
    if (get_array_item(i, freqs) != -1) {
      die("Background contains letter %s which has the same meaning as an "
          "already listed letter.\n", key);
    }
    set_array_item(i, freq, freqs);
  }
  // check that all items were set
  for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) {
    if (get_array_item(i, freqs) == -1) {
      die("Background is missing letter %c.\n", alph_char(*alph, i));
    }
  }
  // disabled for backwards compatability (AMA test was failing)
  //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs);
  // calculate the values of the ambiguous letters from the concrete ones
  calc_ambigs(*alph, FALSE, freqs);
  // cleanup
  rbtree_destroy(letters);
  // return result
  return freqs;
}
Ejemplo n.º 6
0
void tree_map_put(KEY_TYPE key, VALUE_TYPE value, TreeMap* map){
	rbtree_put(key, value, map->tree);
}
Ejemplo n.º 7
0
/***********************************************************************
  Process command line options
 ***********************************************************************/
static void process_command_line(
  int argc,
  char* argv[],
  CENTRIMO_OPTIONS_T *options
) {

  // Define command line options.
  const int num_options = 12;
  cmdoption const centrimo_options[] = {
    {"bgfile", REQUIRED_VALUE},
    {"o", REQUIRED_VALUE},
    {"oc", REQUIRED_VALUE},
    {"score", REQUIRED_VALUE},
    {"motif-pseudo", REQUIRED_VALUE},
    {"ethresh", REQUIRED_VALUE},
    {"maxbin", REQUIRED_VALUE},
    {"norc", NO_VALUE},
    {"noflip", NO_VALUE},
    {"desc", REQUIRED_VALUE},
    {"dfile", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE}
  };


  int option_index = 0;

  /* Make sure various options are set to NULL or defaults. */
  options->alphabet = DNA_ALPH;
  options->allow_clobber = TRUE;
  options->scan_both_strands = TRUE;
  options->no_flip = FALSE;

  options->description = NULL;
  options->desc_file = NULL;
  options->bg_source = NULL;
  options->output_dirname = "centrimo_out";
  options->seq_source = NULL;
  options->motif_sources = arraylst_create();

  options->score_thresh = DEFAULT_SCORE_THRESH;

  options->pseudocount = DEFAULT_PSEUDOCOUNT;

  options->evalue_thresh = DEFAULT_EVALUE_THRESH;

  options->max_window = DEFAULT_MAX_WINDOW;

  // no need to copy, as string is declared in argv array
  options->selected_motifs = rbtree_create(rbtree_strcmp, NULL, NULL, NULL, NULL);

  verbosity = NORMAL_VERBOSE;

  simple_setopt(argc, argv, num_options, centrimo_options);

  // Parse the command line.
  while (TRUE) {
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;

    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    }
    else if (c < 0) {
      (void) simple_getopterror(&message);
      fprintf(stderr, "Error processing command line options (%s)\n", message);
      fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH,
          DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE);
      exit(EXIT_FAILURE);
    }
    if (strcmp(option_name, "bgfile") == 0){
      options->bg_source = option_value;
    }
    else if (strcmp(option_name, "ethresh") == 0){
      options->evalue_thresh = atof(option_value);
    }
    else if (strcmp(option_name, "maxbin") == 0){
      // max_window is one less than the number of places a motif can align
      // within the central window
      options->max_window = atoi(option_value) - 1;  
    }
    else if (strcmp(option_name, "motif") == 0){
      rbtree_put(options->selected_motifs, option_value, NULL);
    }
    else if (strcmp(option_name, "motif-pseudo") == 0){
      options->pseudocount = atof(option_value);
    }
    else if (strcmp(option_name, "norc") == 0){
      options->scan_both_strands = FALSE;
    }
    else if (strcmp(option_name, "noflip") == 0){
      options->no_flip = TRUE;
    }
    else if (strcmp(option_name, "o") == 0){
      // Set output directory with no clobber
      options->output_dirname = option_value;
      options->allow_clobber = FALSE;
    }
    else if (strcmp(option_name, "oc") == 0){
      // Set output directory with clobber
      options->output_dirname = option_value;
      options->allow_clobber = TRUE;
    }
    else if (strcmp(option_name, "score") == 0){
      options->score_thresh = atof(option_value);
    }
    else if (strcmp(option_name, "desc") == 0) {
      options->description = option_value;
    } 
    else if (strcmp(option_name, "dfile") == 0) {
      options->desc_file = option_value;
    }
    else if (strcmp(option_name, "verbosity") == 0){
      verbosity = atoi(option_value);
    }
  }
  // Must have sequence and motif file names
  if (argc < option_index + 2) {
      fprintf(stderr, "Sequences and motifs are both required\n");
    fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH,
        DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE);
    exit(EXIT_FAILURE);
  }

  // Record the input file names
  options->seq_source = argv[option_index++];
  for (;option_index < argc; option_index++) 
    arraylst_add(argv[option_index], options->motif_sources);

  // Set up path values for needed stylesheets and output files.
}