bool xlb_workq_pop_parallel(xlb_work_unit** wu, int** ranks, int work_type) { //TODO: cache the minimum size of parallel task of each type TRACE_START; bool result = false; struct rbtree* T = ¶llel_work[work_type]; TRACE("type: %i tree_size: %i", work_type, rbtree_size(T)); // Common case is empty: want to exit asap if (rbtree_size(T) != 0) { struct pop_parallel_data data = { -1, NULL, NULL, NULL }; data.type = work_type; TRACE("iterator..."); bool found = rbtree_iterator(T, pop_parallel_cb, &data); if (found) { TRACE("found..."); *wu = data.wu; *ranks = data.ranks; result = true; // Release memory: rbtree_remove_node(T, data.node); TRACE("rbtree_removed: wu: %p node: %p...", wu, data.node); free(data.node); xlb_workq_parallel_task_count--; } } TRACE_END; return result; }
/************************************************************************** * Callback invoked when matching an opening pattern tag for a CISML file * of a secondary motif database. It checks that the motif should be scored, * clears out the list of sequence matches and stores the current motif. **************************************************************************/ void motif_secondary(void *ctx, char *accession, char *name, char *db, char *lsId, double *pvalue, double *score) { SECONDARY_LOADER_T *loader = (SECONDARY_LOADER_T*)ctx; SECONDARY_KEY_T key; RBNODE_T *node; PSSM_T *pssm; int i, seq_count; key.db_id = loader->db_id; key.motif_id = accession; node = rbtree_lookup(loader->secondary_motifs, &key, FALSE, NULL); if (node != NULL) { loader->secondary_motif = (SECONDARY_MOTIF_T*)rbtree_value(node); if (!(loader->secondary_motif->loaded)) { seq_count = rbtree_size(loader->sequences); for (i = 0; i < seq_count; ++i) loader->secondary_matches[i] = 0; if (loader->score_threshold_or_multiplier < 0 && loader->score_threshold_or_multiplier >= -1) { pssm = build_motif_pssm(loader->secondary_motif->motif, loader->background, loader->background, NULL, 0, PSSM_RANGE, 0, FALSE); loader->calculated_score_threshold = pssm_best_match_score(pssm) * (-loader->score_threshold_or_multiplier); free_pssm(pssm); } } else { die("Already seen CISML data for this motif!"); } } else { loader->secondary_motif = NULL; } }
/***************************************************************************** * Reads frequency attributes into the pre-allocated freqs array. ****************************************************************************/ static void parse_freq_attrs(PS_T *ps, const char* tag, const xmlChar **attrs) { int i, ncore, seen, *idx; char *end_ptr; double value, sum; RBNODE_T *node; bool seen_bad; ncore = rbtree_size(ps->alph_ids); // initilize the freqs array if (ps->freqs == NULL) ps->freqs = mm_malloc(sizeof(double) * ncore); // reset freqs array; for (i = 0; i < ncore; i++) ps->freqs[i] = -1; seen = 0; seen_bad = false; sum = 0.0; // iterate over attributes for (i = 0; attrs[i] != NULL; i += 2) { idx = (int*)rbtree_get(ps->alph_ids, attrs[i]); if (idx != NULL) { assert(*idx < ncore); if (ps->freqs[*idx] != -1) { dreme_attr_parse_error(ps, PARSE_ATTR_DUPLICATE, tag, (const char*)attrs[i], NULL); continue; } seen++; errno = 0; // reset because we're about to check it value = strtod((const char*)attrs[i+1], &end_ptr); // allow out of range values, mainly because freqs can be very close to zero if (end_ptr == (const char*)attrs[i+1] || (errno && errno != ERANGE) || value < 0 || value > 1) { dreme_attr_parse_error(ps, PARSE_ATTR_BAD_VALUE, tag, (const char*)attrs[i], (const char*)attrs[i+1]); ps->freqs[*idx] = 0; // mark frequence as seen, even though it's bad seen_bad = true; continue; } ps->freqs[*idx] = value; sum += value; } } // check we got everthing if (seen < ncore) { // identify what we're missing for (node = rbtree_first(ps->alph_ids); node != NULL; node = rbtree_next(node)) { idx = (int*)rbtree_value(node); if (ps->freqs[*idx] == -1) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, tag, (char*)rbtree_key(node), NULL); } } } else if (!seen_bad) { // check the frequencies sum to 1 double delta = sum - 1; delta = (delta < 0 ? -delta : delta); if (delta > (0.001 * ncore)) { // dreme writes background probabilities to 3 decimal places so assuming // the error on each is at maximum 0.001 then the total error for the // sum must be less than or equal to 0.004 error(ps, "Probabilities of %s do not sum to 1, got %g .\n", tag, sum); } } }
bool rbtree_equal(const void *_a, const void *_b) { const RBTree *a = _a, *b = _b; if (a == b) { return true; } if (a == NULL || b == NULL) { return false; } if (a->key_compare != b->key_compare || a->value_compare != b->value_compare) { return false; } if (rbtree_size(a) != rbtree_size(b)) { return false; } RBTreeIterator *it_a = rbtree_iterator_new(a); RBTreeIterator *it_b = rbtree_iterator_new(b); void *a_key, *a_val, *b_key, *b_val; while (rbtree_iterator_next(it_a, &a_key, &a_val) && rbtree_iterator_next(it_b, &b_key, &b_val)) { if (a->key_compare(a_key, b_key) != 0 || b->value_compare(a_val, b_val)) { rbtree_iterator_destroy(it_a); rbtree_iterator_destroy(it_b); return false; } } rbtree_iterator_destroy(it_a); rbtree_iterator_destroy(it_b); return true; }
/************************************************************************* * Read a motif database *************************************************************************/ static MOTIF_DB_T* read_motifs(int id, char* motif_source, char* bg_source, ARRAY_T** bg, double pseudocount, RBTREE_T *selected, ALPH_T alph) { // vars int read_motifs; MOTIF_DB_T* motifdb; MREAD_T *mread; MOTIF_T *motif; ARRAYLST_T *motifs; // open the motif file for reading mread = mread_create(motif_source, OPEN_MFILE); mread_set_pseudocount(mread, pseudocount); // determine background to use if (*bg != NULL) mread_set_background(mread, *bg); else mread_set_bg_source(mread, bg_source); // load motifs read_motifs = 0; if (rbtree_size(selected) > 0) { motifs = arraylst_create(); while(mread_has_motif(mread)) { motif = mread_next_motif(mread); read_motifs++; if (rbtree_find(selected, get_motif_id(motif))) { arraylst_add(motif, motifs); } else { DEBUG_FMT(NORMAL_VERBOSE, "Discarding motif %s in %s.\n", get_motif_id(motif), motif_source); destroy_motif(motif); } } } else { motifs = mread_load(mread, NULL); read_motifs = arraylst_size(motifs); } arraylst_fit(motifs); if (read_motifs > 0) { // check the alphabet if (mread_get_alphabet(mread) != alph) { die("Expected %s alphabet motifs\n", alph_name(alph)); } // get the background if (*bg == NULL) *bg = mread_get_background(mread); } else { fprintf(stderr, "Warning: Motif file %s contains no motifs.\n", motif_source); } // clean up motif reader mread_destroy(mread); // create motif db motifdb = mm_malloc(sizeof(MOTIF_DB_T)); memset(motifdb, 0, sizeof(MOTIF_DB_T)); motifdb->id = id; motifdb->source = strdup(motif_source); motifdb->motifs = motifs; return motifdb; }
/*********************************************************************** * Convert a tree of motifs into an array of motifs with a count. * This is intended to allow backwards compatibility with the older * version. ***********************************************************************/ void motif_tree_to_array(RBTREE_T *motif_tree, MOTIF_T **motif_array, int *num) { int count, i; MOTIF_T *motifs; RBNODE_T *node; count = rbtree_size(motif_tree); motifs = mm_malloc(sizeof(MOTIF_T) * count); for (i = 0, node = rbtree_first(motif_tree); node != NULL; i++, node = rbtree_next(node)) { copy_motif((MOTIF_T*)rbtree_value(node), motifs+i); } *motif_array = motifs; *num = count; }
/************************************************************************** * Reads a CISML file containing the scores for secondary motif database * and tallys the spacings of the best matches of primary motifs to secondary * motifs. **************************************************************************/ void load_spamo_secondary( const char *cisml, int margin, double score_threshold, double motif_evalue_cutoff, double sigthresh, int bin, BOOLEAN_T dump_sig_only, int test_max, int n_secondary_motifs, BOOLEAN_T output_sequences, char *output_directory, RBTREE_T *sequences, MOTIF_T *primary_motif, RBTREE_T *secondary_motifs, ARRAY_T *background, int db_id ) { CISML_CALLBACKS_T callbacks; SECONDARY_LOADER_T data; memset(&callbacks, 0, sizeof(CISML_CALLBACKS_T)); callbacks.start_pattern = motif_secondary; callbacks.start_scanned_sequence = sequence_secondary; callbacks.start_matched_element = match_secondary; callbacks.end_scanned_sequence = sequence_end_secondary; callbacks.end_pattern = motif_end_secondary; data.output_sequences = output_sequences; data.output_directory = output_directory; data.margin = margin; data.bin = bin; data.significant_pvalue = sigthresh; data.motif_evalue_cutoff = motif_evalue_cutoff; data.dump_sig_only = dump_sig_only; data.test_max = test_max; data.n_secondary_motifs = n_secondary_motifs; data.sequences = sequences; data.primary_motif = primary_motif; data.primary_lpos = 0; data.primary_rpos = 0; data.secondary_motifs = secondary_motifs; data.secondary_motif = NULL; data.secondary_matches = mm_malloc(sizeof(int) * rbtree_size(sequences)); data.hits_size = 10; data.hits = mm_malloc(sizeof(int) * data.hits_size); data.hit_count = 0; data.score_threshold_or_multiplier = score_threshold; data.db_id = db_id; data.background = background; data.calculated_score_threshold = score_threshold; // to be calculated per motif parse_cisml(&callbacks, &data, cisml); free(data.secondary_matches); free(data.hits); }
/* * Checks for infinite loops. Every parsing state must either consume * some data or change the state to one that hasn't been used at this * position. As there are a finite number of states this ensures that * parsing will stop at some point or be detected by this function. */ static bool loop_check(JSONRD_T *jsonrd, PS_EN prior_state, int consumed) { RBTREE_T *prior_states; PS_EN new_state; bool is_new_state; prior_states = jsonrd->prior_states; if (consumed == 0) { new_state = jsonrd->state; if (rbtree_size(prior_states) == 0) { if (prior_state == new_state) return true; rbtree_put(prior_states, &prior_state, NULL); rbtree_put(prior_states, &new_state, NULL); } else { rbtree_lookup(prior_states, &new_state, true, &is_new_state); if (!is_new_state) return true; } } else { rbtree_clear(prior_states); } return false; }
/***************************************************************************** * dreme > motifs > motif > pos * * i index of the motif position (optional) * <symbol> frequency of <symbol> ****************************************************************************/ static void start_ele_pos(PS_T *ps, const xmlChar **attrs) { int pos = ps->last_pos + 1; if (!ps->seen_alphabet) { // attribute "i" only exists in the older specification without custom alphabets char* names[1] = {"i"}; int (*parsers[1])(char*, void*) = {ld_int}; void *data[1] = {&pos}; BOOLEAN_T required[1] = {TRUE}; BOOLEAN_T done[1]; parse_attributes(dreme_attr_parse_error, ps, "pos", attrs, 1, names, parsers, data, required, done); if (pos != (ps->last_pos + 1)) { error(ps, "Motif %s did not have pos %d but instead " "has pos %d in its place.\n", ps->motif_id, ps->last_pos + 1, pos); } } ps->last_pos = pos; parse_freq_attrs(ps, "pos", attrs); if (ps->callbacks->handle_pos && ps->state != PS_ERROR) { ps->callbacks->handle_pos(ps->user_data, pos, rbtree_size(ps->alph_ids), ps->freqs); } }
ARRAYLST_T* load_motifs(AMA_OPTIONS_T *opts) { ARRAYLST_T *motifs; ARRAY_T *pos_bg_freqs, *rev_bg_freqs; MREAD_T *mread; MOTIF_T *motif, *motif_rc; double range; PSSM_T *pos_pssm, *neg_pssm; int total_motifs; ALPH_T *alph; // // Read the motifs and background model. // //this reads any meme file, xml, txt and html mread = mread_create(opts->motif_filename, OPEN_MFILE); mread_set_bg_source(mread, opts->bg_filename); mread_set_pseudocount(mread, opts->pseudocount); // sanity check, since the rest of the code relies on the motifs being complementable alph = alph_hold(mread_get_alphabet(mread)); if (alph == NULL) die("Unable to determine alphabet from motifs"); if (opts->scan_both_strands && !alph_has_complement(alph)) { opts->scan_both_strands = false; } if (opts->num_gc_bins > 1 && alph_size_core(alph) != 4 && alph_size_pairs(alph) != 2) { fprintf(stderr, "Warning: The motif alphabet does not have exactly 2 complementary pairs so \"GC binning\" will be disabled.\n"); opts->num_gc_bins = 1; } pos_bg_freqs = mread_get_background(mread); rev_bg_freqs = NULL; if (opts->scan_both_strands) { rev_bg_freqs = allocate_array(get_array_length(pos_bg_freqs)); copy_array(pos_bg_freqs, rev_bg_freqs); complement_swap_freqs(alph, rev_bg_freqs, rev_bg_freqs); } // allocate memory for motifs motifs = arraylst_create(); // // Convert motif matrices into log-odds matrices. // Scale them. // Compute the lookup tables for the PDF of scaled log-odds scores. // range = 300; // 100 is not very good; 1000 is great but too slow neg_pssm = NULL; total_motifs = 0; while (mread_has_motif(mread)) { motif = mread_next_motif(mread); total_motifs++; if (rbtree_size(opts->selected_motifs) == 0 || rbtree_find(opts->selected_motifs, get_motif_id(motif)) != NULL) { if (verbosity >= HIGH_VERBOSE) { fprintf(stderr, "Using motif %s of width %d.\n", get_motif_id(motif), get_motif_length(motif)); } pos_pssm = build_motif_pssm( motif, pos_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, opts->num_gc_bins, true ); // // Note: If scanning both strands, we complement the motif frequencies // but not the background frequencies so the motif looks the same. // However, the given frequencies are used in computing the p-values // since they represent the frequencies on the negative strands. // (If we instead were to complement the input sequence, keeping the // the motif fixed, we would need to use the complemented frequencies // in computing the p-values. Is that any clearer?) // if (opts->scan_both_strands) { motif_rc = dup_rc_motif(motif); neg_pssm = build_motif_pssm( motif_rc, rev_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, opts->num_gc_bins, true ); destroy_motif(motif_rc); } arraylst_add(motif_and_pssm_create(motif, pos_pssm, neg_pssm), motifs); } else { if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "Skipping motif %s.\n", get_motif_id(motif)); destroy_motif(motif); } } mread_destroy(mread); free_array(pos_bg_freqs); free_array(rev_bg_freqs); alph_release(alph); if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Loaded %d/%d motifs from %s.\n", arraylst_size(motifs), total_motifs, opts->motif_filename); } return motifs; }
/* * Load background file frequencies into the array. */ ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) { regmatch_t matches[4]; STR_T *line; char chunk[BG_CHUNK_SIZE+1], letter[2], *key; int size, terminate, offset, i; FILE *fp; regex_t bgfreq; double freq; RBTREE_T *letters; RBNODE_T *node; regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED); letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free); line = str_create(100); if (!(fp = fopen(bg_filename, "r"))) { die("Unable to open background file \"%s\" for reading.\n", bg_filename); } terminate = feof(fp); while (!terminate) { size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp); chunk[size] = '\0'; terminate = feof(fp); offset = 0; while (offset < size) { // skip mac newline if (str_len(line) == 0 && chunk[offset] == '\r') { offset++; continue; } // find next new line for (i = offset; i < size; ++i) { if (chunk[i] == '\n') break; } // append portion up to the new line or end of chunk str_append(line, chunk+offset, i - offset); // read more if we didn't find a new line if (i == size && !terminate) break; // move the offset past the new line offset = i + 1; // handle windows new line if (str_char(line, -1) == '\r') str_truncate(line, -1); // remove everything to the right of a comment character for (i = 0; i < str_len(line); ++i) { if (str_char(line, i) == '#') { str_truncate(line, i); break; } } // check the line for a single letter followed by a number if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) { // parse the letter and frequency value regex_strncpy(matches+1, str_internal(line), letter, 2); freq = regex_dbl(matches+2, str_internal(line)); // check the frequency is acceptable if (freq < 0 || freq > 1) { die("The background file lists the illegal probability %g for " "the letter %s.\n", freq, letter); } else if (freq == 0) { die("The background file lists a probability of zero for the " "letter %s\n", letter); } if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq); } str_clear(line); } } // finished with the file so clean up file parsing stuff fclose(fp); str_destroy(line, FALSE); regfree(&bgfreq); // guess the alphabet if (*alph == INVALID_ALPH) { switch (rbtree_size(letters)) { case PROTEIN_ASIZE: *alph = PROTEIN_ALPH; break; case DNA_ASIZE: *alph = DNA_ALPH; break; default: die("Number of single character entries in background does not match " "an alphabet.\n"); } } // make the background if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE)); assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE)); init_array(-1, freqs); for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) { key = (char*)rbtree_key(node); i = alph_index(*alph, key[0]); freq = *((double*)rbtree_value(node)); if (i == -1) { die("Background contains letter %s which is not in the %s alphabet.\n", key, alph_name(*alph)); } if (get_array_item(i, freqs) != -1) { die("Background contains letter %s which has the same meaning as an " "already listed letter.\n", key); } set_array_item(i, freq, freqs); } // check that all items were set for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) { if (get_array_item(i, freqs) == -1) { die("Background is missing letter %c.\n", alph_char(*alph, i)); } } // disabled for backwards compatability (AMA test was failing) //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs); // calculate the values of the ambiguous letters from the concrete ones calc_ambigs(*alph, FALSE, freqs); // cleanup rbtree_destroy(letters); // return result return freqs; }
/***************************************************************************** * dreme > model > background * * type is the alphabet DNA or RNA (optional when custom alphabet specified) * <symbol> frequency of <symbol> from core alphabet * from from the negative dataset or a background file * file the background file (optional) * last_mod_date the last modified date of the background file (optional) ****************************************************************************/ static void start_ele_background(PS_T *ps, const xmlChar **attrs) { int type, from; char *file, *lastmod; // set reasonable defaults type = DREME_BG_FROM_DATASET; from = DREME_ALPH_DNA; file = NULL; lastmod = NULL; char* type_options[2] = {"dna", "rna"}; int type_values[2] = {DREME_ALPH_DNA, DREME_ALPH_RNA}; MULTI_T type_multi = {.count = 2, .options = type_options, .outputs = type_values, .target = &(type)}; char* from_options[2] = {"dataset", "file"}; int from_values[2] = {DREME_BG_FROM_DATASET, DREME_BG_FROM_FILE}; MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)}; char* names[4] = {"file", "from", "last_mod_date", "type"}; int (*parsers[4])(char*, void*) = {ld_str, ld_multi, ld_str, ld_multi}; void *data[4] = {&file, &from_multi, &lastmod, &type_multi}; BOOLEAN_T required[4] = {FALSE, TRUE, FALSE, FALSE}; BOOLEAN_T done[4]; required[3] = !ps->seen_alphabet; parse_attributes(dreme_attr_parse_error, ps, "background", attrs, 4, names, parsers, data, required, done); if (from == DREME_BG_FROM_FILE) { if (!done[0]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "file", NULL); if (!done[2]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "last_mod_date", NULL); } // if we haven't seen the alphabet then we must define it from the type if (!ps->seen_alphabet) { int idx = 0; rbtree_make(ps->alph_ids, "A", &idx); idx++; rbtree_make(ps->alph_ids, "C", &idx); idx++; rbtree_make(ps->alph_ids, "G", &idx); idx++; rbtree_make(ps->alph_ids, (type == DREME_ALPH_DNA ? "T" : "U"), &idx); } parse_freq_attrs(ps, "background", attrs); if (ps->callbacks->handle_background && ps->state != PS_ERROR) { ps->callbacks->handle_background(ps->user_data, rbtree_size(ps->alph_ids), ps->freqs, from, file, lastmod); } } /***************************************************************************** * dreme > model > stop * * evalue the stopping evalue (returned as log10). * count the stopping count. * time the stopping time. ****************************************************************************/ static void start_ele_stop(PS_T *ps, const xmlChar **attrs) { int count, time; double log10evalue; char* names[3] = {"count", "evalue", "time"}; int (*parsers[3])(char*, void*) = {ld_int, ld_log10_ev, ld_int}; void *data[3] = {&count, &log10evalue, &time}; BOOLEAN_T required[3] = {FALSE, FALSE, FALSE}; BOOLEAN_T done[3]; parse_attributes(dreme_attr_parse_error, ps, "stop", attrs, 3, names, parsers, data, required, done); if (ps->callbacks->handle_stop && ps->state != PS_ERROR) { ps->callbacks->handle_stop(ps->user_data, &log10evalue, &count, &time); } } /***************************************************************************** * dreme > model > /ngen * the number of generations to check (or something like that). ****************************************************************************/ static void end_ele_ngen(PS_T *ps) { int ngen; if (ld_int(ps->characters.buffer, &ngen)) { error(ps, "Bad value \"%s\" for ngen.\n", ps->characters.buffer); } if (ps->callbacks->handle_ngen && ps->state != PS_ERROR) { ps->callbacks->handle_ngen(ps->user_data, ngen); } }
/***************************************************************************** * dreme > model > negatives * * name the name of the negative dataset * count the number of sequences in the negative dataset * from the source of the negative dataset (eg shuffled positives) * file the file containing the negative dataset (optional) * last_mod_date the last modified date of the file (optional) ****************************************************************************/ static void start_ele_negatives(PS_T *ps, const xmlChar **attrs) { char *name, *file, *lastmod; long count; int from; file = NULL; lastmod = NULL; char* from_options[2] = {"file", "shuffled"}; int from_values[2] = {DREME_NEG_FILE, DREME_NEG_SHUFFLED}; MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)}; char* names[5] = {"count", "file", "from", "last_mod_date", "name"}; int (*parsers[5])(char*, void*) = {ld_long, ld_str, ld_multi, ld_str, ld_str}; void *data[5] = {&count, &file, &from_multi, &lastmod, &name}; BOOLEAN_T required[5] = {TRUE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[5]; parse_attributes(dreme_attr_parse_error, ps, "negatives", attrs, 5, names, parsers, data, required, done); if (ps->state != PS_ERROR && from == DREME_NEG_FILE) { if (file == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "file", NULL); } if (lastmod == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "last_mod_date", NULL); } } if (ps->callbacks->handle_negatives && ps->state != PS_ERROR) { ps->callbacks->handle_negatives(ps->user_data, name, count, (DREME_NEG_EN)from, file, lastmod); } } /***************************************************************************** * DREME > model > alphabet ****************************************************************************/ static void start_ele_alphabet(PS_T *ps, const xmlChar **attrs) { char *name; int extends; char* extends_options[3] = {"dna", "protein", "rna"}; int extends_values[3] = {ALPH_FLAG_EXTENDS_DNA, ALPH_FLAG_EXTENDS_PROTEIN, ALPH_FLAG_EXTENDS_RNA}; MULTI_T extends_multi = {.count = 3, .options = extends_options, .outputs = extends_values, .target = &(extends)}; char* names[2] = {"like", "name"}; int (*parsers[2])(char*, void*) = {ld_multi, ld_str}; void *data[2] = {&extends_multi, &name}; BOOLEAN_T required[2] = {FALSE, FALSE}; BOOLEAN_T done[2]; // just so we know later on when reading the background which used to set the alphabet ps->seen_alphabet = true; // defaults name = NULL; extends = 0; parse_attributes(dreme_attr_parse_error, ps, "alphabet", attrs, 2, names, parsers, data, required, done); if (ps->callbacks->start_alphabet && ps->state != PS_ERROR) { ps->callbacks->start_alphabet(ps->user_data, name, extends); } dreme_push_es(ps, PS_IN_ALPHABET_LETTER, ES_ONE_OR_MORE); } /***************************************************************************** * DREME > model > /alphabet ****************************************************************************/ static void end_ele_alphabet(PS_T *ps) { if (ps->callbacks->end_alphabet && ps->state != PS_ERROR) { ps->callbacks->end_alphabet(ps->user_data); } } /***************************************************************************** * DREME > model > alphabet > letter ****************************************************************************/ static void start_ele_alphabet_letter(PS_T *ps, const xmlChar **attrs) { char *aliases, *id, *name, *equals, symbol, complement; int colour, idx; char* names[7] = {"aliases", "colour", "complement", "equals", "id", "name", "symbol"}; int (*parsers[7])(char*, void*) = {ld_str, ld_hex, ld_char, ld_str, ld_str, ld_str, ld_char}; void *data[7] = {&aliases, &colour, &complement, &equals, &id, &name, &symbol}; BOOLEAN_T required[7] = {FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[7]; aliases = NULL; name = NULL; equals = NULL; complement = '\0'; colour = -1; parse_attributes(dreme_attr_parse_error, ps, "letter", attrs, 7, names, parsers, data, required, done); if (ps->seen_ambig) { if (equals == NULL) { error(ps, "All core symbols must appear before any ambigous symbols.\n"); } } else if (equals == NULL) { idx = rbtree_size(ps->alph_ids); rbtree_make(ps->alph_ids, id, &idx); } else { ps->seen_ambig = true; } if (ps->callbacks->handle_alphabet_letter && ps->state != PS_ERROR) { ps->callbacks->handle_alphabet_letter(ps->user_data, id, symbol, aliases, complement, equals, name, colour); } }