/***************************************************************************** * MEME > motifs > motif * Construct the skeleton of a motif. ****************************************************************************/ void mxml_start_motif(void *ctx, char *id, char *name, char *alt, int width, double sites, double llr, double ic, double re, double bayes_threshold, double log10_evalue, double elapsed_time, char *url) { CTX_T *data; MOTIF_T *motif; data = (CTX_T*)ctx; data->mscope.motif = mm_malloc(sizeof(MOTIF_T)); motif = data->mscope.motif; memset(motif, 0, sizeof(MOTIF_T)); set_motif_id(name, strlen(name), motif); set_motif_id2(alt, sizeof(alt), motif); set_motif_strand('+', motif); motif->length = width; motif->num_sites = sites; motif->url = strdup(url); motif->log_evalue = log10_evalue; motif->evalue = pow(10.0, log10_evalue); // calculate alphabet size motif->alph = alph_hold(data->alph); motif->flags = (data->fscope.strands == 2 ? MOTIF_BOTH_STRANDS : 0); // allocate matricies motif->freqs = allocate_matrix(motif->length, alph_size_core(motif->alph)); init_matrix(-1, motif->freqs); motif->scores = allocate_matrix(motif->length, alph_size_core(motif->alph)); init_matrix(NO_SCORE, motif->scores); // should be set by a post processing method motif->complexity = -1; motif->trim_left = 0; motif->trim_right = 0; // cache motif position if (data->options & SCANNED_SITES) { rbtree_put(data->motif_lookup, id, &(data->current_motif)); } }
/***************************************************************************** * MEME > training_set > sequence ****************************************************************************/ void mxml_sequence(void *ctx, char *id, char *name, int length, double weight) { CTX_T *data; data = (CTX_T*)ctx; if (data->options & SCANNED_SITES) { rbtree_put(data->sequence_lookup, id, create_seqinfo(name, length)); } }
/* * Checks for infinite loops. Every parsing state must either consume * some data or change the state to one that hasn't been used at this * position. As there are a finite number of states this ensures that * parsing will stop at some point or be detected by this function. */ static bool loop_check(JSONRD_T *jsonrd, PS_EN prior_state, int consumed) { RBTREE_T *prior_states; PS_EN new_state; bool is_new_state; prior_states = jsonrd->prior_states; if (consumed == 0) { new_state = jsonrd->state; if (rbtree_size(prior_states) == 0) { if (prior_state == new_state) return true; rbtree_put(prior_states, &prior_state, NULL); rbtree_put(prior_states, &new_state, NULL); } else { rbtree_lookup(prior_states, &new_state, true, &is_new_state); if (!is_new_state) return true; } } else { rbtree_clear(prior_states); } return false; }
int try_rbtree() { RBTree *t = rbtree_create(); long int i; for (i = 0; i != 10; i++) { rbtree_put(t, random() % 100, (void *)i); // rbtree_show(t); // printf("--------------------------\n"); } rbtree_show(t); return 0; }
/* * Load background file frequencies into the array. */ ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) { regmatch_t matches[4]; STR_T *line; char chunk[BG_CHUNK_SIZE+1], letter[2], *key; int size, terminate, offset, i; FILE *fp; regex_t bgfreq; double freq; RBTREE_T *letters; RBNODE_T *node; regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED); letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free); line = str_create(100); if (!(fp = fopen(bg_filename, "r"))) { die("Unable to open background file \"%s\" for reading.\n", bg_filename); } terminate = feof(fp); while (!terminate) { size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp); chunk[size] = '\0'; terminate = feof(fp); offset = 0; while (offset < size) { // skip mac newline if (str_len(line) == 0 && chunk[offset] == '\r') { offset++; continue; } // find next new line for (i = offset; i < size; ++i) { if (chunk[i] == '\n') break; } // append portion up to the new line or end of chunk str_append(line, chunk+offset, i - offset); // read more if we didn't find a new line if (i == size && !terminate) break; // move the offset past the new line offset = i + 1; // handle windows new line if (str_char(line, -1) == '\r') str_truncate(line, -1); // remove everything to the right of a comment character for (i = 0; i < str_len(line); ++i) { if (str_char(line, i) == '#') { str_truncate(line, i); break; } } // check the line for a single letter followed by a number if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) { // parse the letter and frequency value regex_strncpy(matches+1, str_internal(line), letter, 2); freq = regex_dbl(matches+2, str_internal(line)); // check the frequency is acceptable if (freq < 0 || freq > 1) { die("The background file lists the illegal probability %g for " "the letter %s.\n", freq, letter); } else if (freq == 0) { die("The background file lists a probability of zero for the " "letter %s\n", letter); } if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq); } str_clear(line); } } // finished with the file so clean up file parsing stuff fclose(fp); str_destroy(line, FALSE); regfree(&bgfreq); // guess the alphabet if (*alph == INVALID_ALPH) { switch (rbtree_size(letters)) { case PROTEIN_ASIZE: *alph = PROTEIN_ALPH; break; case DNA_ASIZE: *alph = DNA_ALPH; break; default: die("Number of single character entries in background does not match " "an alphabet.\n"); } } // make the background if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE)); assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE)); init_array(-1, freqs); for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) { key = (char*)rbtree_key(node); i = alph_index(*alph, key[0]); freq = *((double*)rbtree_value(node)); if (i == -1) { die("Background contains letter %s which is not in the %s alphabet.\n", key, alph_name(*alph)); } if (get_array_item(i, freqs) != -1) { die("Background contains letter %s which has the same meaning as an " "already listed letter.\n", key); } set_array_item(i, freq, freqs); } // check that all items were set for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) { if (get_array_item(i, freqs) == -1) { die("Background is missing letter %c.\n", alph_char(*alph, i)); } } // disabled for backwards compatability (AMA test was failing) //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs); // calculate the values of the ambiguous letters from the concrete ones calc_ambigs(*alph, FALSE, freqs); // cleanup rbtree_destroy(letters); // return result return freqs; }
void tree_map_put(KEY_TYPE key, VALUE_TYPE value, TreeMap* map){ rbtree_put(key, value, map->tree); }
/*********************************************************************** Process command line options ***********************************************************************/ static void process_command_line( int argc, char* argv[], CENTRIMO_OPTIONS_T *options ) { // Define command line options. const int num_options = 12; cmdoption const centrimo_options[] = { {"bgfile", REQUIRED_VALUE}, {"o", REQUIRED_VALUE}, {"oc", REQUIRED_VALUE}, {"score", REQUIRED_VALUE}, {"motif-pseudo", REQUIRED_VALUE}, {"ethresh", REQUIRED_VALUE}, {"maxbin", REQUIRED_VALUE}, {"norc", NO_VALUE}, {"noflip", NO_VALUE}, {"desc", REQUIRED_VALUE}, {"dfile", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE} }; int option_index = 0; /* Make sure various options are set to NULL or defaults. */ options->alphabet = DNA_ALPH; options->allow_clobber = TRUE; options->scan_both_strands = TRUE; options->no_flip = FALSE; options->description = NULL; options->desc_file = NULL; options->bg_source = NULL; options->output_dirname = "centrimo_out"; options->seq_source = NULL; options->motif_sources = arraylst_create(); options->score_thresh = DEFAULT_SCORE_THRESH; options->pseudocount = DEFAULT_PSEUDOCOUNT; options->evalue_thresh = DEFAULT_EVALUE_THRESH; options->max_window = DEFAULT_MAX_WINDOW; // no need to copy, as string is declared in argv array options->selected_motifs = rbtree_create(rbtree_strcmp, NULL, NULL, NULL, NULL); verbosity = NORMAL_VERBOSE; simple_setopt(argc, argv, num_options, centrimo_options); // Parse the command line. while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); fprintf(stderr, "Error processing command line options (%s)\n", message); fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH, DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE); exit(EXIT_FAILURE); } if (strcmp(option_name, "bgfile") == 0){ options->bg_source = option_value; } else if (strcmp(option_name, "ethresh") == 0){ options->evalue_thresh = atof(option_value); } else if (strcmp(option_name, "maxbin") == 0){ // max_window is one less than the number of places a motif can align // within the central window options->max_window = atoi(option_value) - 1; } else if (strcmp(option_name, "motif") == 0){ rbtree_put(options->selected_motifs, option_value, NULL); } else if (strcmp(option_name, "motif-pseudo") == 0){ options->pseudocount = atof(option_value); } else if (strcmp(option_name, "norc") == 0){ options->scan_both_strands = FALSE; } else if (strcmp(option_name, "noflip") == 0){ options->no_flip = TRUE; } else if (strcmp(option_name, "o") == 0){ // Set output directory with no clobber options->output_dirname = option_value; options->allow_clobber = FALSE; } else if (strcmp(option_name, "oc") == 0){ // Set output directory with clobber options->output_dirname = option_value; options->allow_clobber = TRUE; } else if (strcmp(option_name, "score") == 0){ options->score_thresh = atof(option_value); } else if (strcmp(option_name, "desc") == 0) { options->description = option_value; } else if (strcmp(option_name, "dfile") == 0) { options->desc_file = option_value; } else if (strcmp(option_name, "verbosity") == 0){ verbosity = atoi(option_value); } } // Must have sequence and motif file names if (argc < option_index + 2) { fprintf(stderr, "Sequences and motifs are both required\n"); fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH, DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE); exit(EXIT_FAILURE); } // Record the input file names options->seq_source = argv[option_index++]; for (;option_index < argc; option_index++) arraylst_add(argv[option_index], options->motif_sources); // Set up path values for needed stylesheets and output files. }