/***************************************************************************** * dreme * contains: model, motifs, run_time * * release the release date. * version the program version. ****************************************************************************/ static void start_ele_dreme(PS_T *ps, const xmlChar **attrs) { char *release; char* names[2] = {"release", "version"}; int (*parsers[2])(char*, void*) = {ld_str, ld_version}; void *data[2] = {&release, &(ps->ver)}; BOOLEAN_T required[2] = {TRUE, TRUE}; BOOLEAN_T done[2]; parse_attributes(dreme_attr_parse_error, ps, "dreme", attrs, 2, names, parsers, data, required, done); if (ps->callbacks->start_dreme && ps->state != PS_ERROR) { ps->callbacks->start_dreme(ps->user_data, ps->ver.major, ps->ver.minor, ps->ver.patch, release); } dreme_push_es(ps, PS_IN_RUN_TIME, ES_ONCE); dreme_push_es(ps, PS_IN_MOTIFS, ES_ONCE); dreme_push_es(ps, PS_IN_MODEL, ES_ONCE); }
/***************************************************************************** * dreme > motifs > motif * contains: pos+, match* * * id the identifier used by DREME * seq the DNA iupac sequence representing the motif. * length the length of the motif * nsites the number of sites used to create the motif * p the number of sequences in the positive set with the motif * n the number of sequences in the negative set with the motif * pvalue the pvalue of the motif after erasing (returned as log10) * evalue the evalue of the motif after erasing (returned as log10) * unerased_evalue the evalue of the motif without erasing (returned as log10) ****************************************************************************/ static void start_ele_motif(PS_T *ps, const xmlChar **attrs) { char *id, *seq; int length; long nsites, p_hits, n_hits; double log10pvalue, log10evalue, log10uevalue; char* names[9] = {"evalue", "id", "length", "n", "nsites", "p", "pvalue", "seq", "unerased_evalue"}; int (*parsers[9])(char*, void*) = {ld_log10_ev, ld_str, ld_int, ld_long, ld_long, ld_long, ld_log10_pv, ld_str, ld_log10_ev}; void *data[9] = {&log10evalue, &id, &length, &n_hits, &nsites, &p_hits, &log10pvalue, &seq, &log10uevalue}; BOOLEAN_T required[9] = {TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE}; BOOLEAN_T done[9]; parse_attributes(dreme_attr_parse_error, ps, "motif", attrs, 9, names, parsers, data, required, done); // copy the motif id so we can use it in any error messages if (ps->state != PS_ERROR) { int len = strlen(id); ps->motif_id = mm_malloc(sizeof(char) * (len + 1)); strcpy(ps->motif_id, id); ps->last_pos = 0; ps->motif_len = length; } if (ps->callbacks->start_motif && ps->state != PS_ERROR) { ps->callbacks->start_motif(ps->user_data, id, seq, length, nsites, p_hits, n_hits, log10pvalue, log10evalue, log10uevalue); } dreme_push_es(ps, PS_IN_MATCH, ES_ANY); dreme_push_es(ps, PS_IN_POS, ES_ONE_OR_MORE); }
/***************************************************************************** * Handle the document start ****************************************************************************/ void handle_dreme_start_doc(void *ctx) { PS_T *ps = (PS_T*)ctx; ps->state = PS_START; dreme_push_es(ps, PS_END, ES_ONCE); dreme_push_es(ps, PS_IN_DREME, ES_ONCE); }
/***************************************************************************** * dreme > motifs * contains: motif* ****************************************************************************/ static void start_ele_motifs(PS_T *ps, const xmlChar **attrs) { if (ps->callbacks->start_motifs && ps->state != PS_ERROR) { ps->callbacks->start_motifs(ps->user_data); } dreme_push_es(ps, PS_IN_MOTIF, ES_ANY); }
/***************************************************************************** * dreme > model * contains: command_line, positives, negatives, background, stop, ngen, * add_pv_thresh, seed, host, when, description ****************************************************************************/ static void start_ele_model(PS_T *ps, const xmlChar **attrs) { if (ps->callbacks->start_model && ps->state != PS_ERROR) { ps->callbacks->start_model(ps->user_data); } dreme_push_es(ps, PS_IN_DESCRIPTION, ES_ZERO_OR_ONE); dreme_push_es(ps, PS_IN_WHEN, ES_ONCE); dreme_push_es(ps, PS_IN_HOST, ES_ONCE); dreme_push_es(ps, PS_IN_SEED, ES_ONCE); dreme_push_es(ps, PS_IN_ADD_PV_THRESH, ES_ONCE); dreme_push_es(ps, PS_IN_NGEN, ES_ONCE); dreme_push_es(ps, PS_IN_STOP, ES_ONCE); dreme_push_es(ps, PS_IN_BACKGROUND, ES_ONCE); dreme_push_es(ps, PS_IN_NEGATIVES, ES_ONCE); dreme_push_es(ps, PS_IN_POSITIVES, ES_ONCE); dreme_push_es(ps, PS_IN_COMMAND_LINE, ES_ONCE); }
/***************************************************************************** * dreme > model > negatives * * name the name of the negative dataset * count the number of sequences in the negative dataset * from the source of the negative dataset (eg shuffled positives) * file the file containing the negative dataset (optional) * last_mod_date the last modified date of the file (optional) ****************************************************************************/ static void start_ele_negatives(PS_T *ps, const xmlChar **attrs) { char *name, *file, *lastmod; long count; int from; file = NULL; lastmod = NULL; char* from_options[2] = {"file", "shuffled"}; int from_values[2] = {DREME_NEG_FILE, DREME_NEG_SHUFFLED}; MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)}; char* names[5] = {"count", "file", "from", "last_mod_date", "name"}; int (*parsers[5])(char*, void*) = {ld_long, ld_str, ld_multi, ld_str, ld_str}; void *data[5] = {&count, &file, &from_multi, &lastmod, &name}; BOOLEAN_T required[5] = {TRUE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[5]; parse_attributes(dreme_attr_parse_error, ps, "negatives", attrs, 5, names, parsers, data, required, done); if (ps->state != PS_ERROR && from == DREME_NEG_FILE) { if (file == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "file", NULL); } if (lastmod == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "last_mod_date", NULL); } } if (ps->callbacks->handle_negatives && ps->state != PS_ERROR) { ps->callbacks->handle_negatives(ps->user_data, name, count, (DREME_NEG_EN)from, file, lastmod); } } /***************************************************************************** * DREME > model > alphabet ****************************************************************************/ static void start_ele_alphabet(PS_T *ps, const xmlChar **attrs) { char *name; int extends; char* extends_options[3] = {"dna", "protein", "rna"}; int extends_values[3] = {ALPH_FLAG_EXTENDS_DNA, ALPH_FLAG_EXTENDS_PROTEIN, ALPH_FLAG_EXTENDS_RNA}; MULTI_T extends_multi = {.count = 3, .options = extends_options, .outputs = extends_values, .target = &(extends)}; char* names[2] = {"like", "name"}; int (*parsers[2])(char*, void*) = {ld_multi, ld_str}; void *data[2] = {&extends_multi, &name}; BOOLEAN_T required[2] = {FALSE, FALSE}; BOOLEAN_T done[2]; // just so we know later on when reading the background which used to set the alphabet ps->seen_alphabet = true; // defaults name = NULL; extends = 0; parse_attributes(dreme_attr_parse_error, ps, "alphabet", attrs, 2, names, parsers, data, required, done); if (ps->callbacks->start_alphabet && ps->state != PS_ERROR) { ps->callbacks->start_alphabet(ps->user_data, name, extends); } dreme_push_es(ps, PS_IN_ALPHABET_LETTER, ES_ONE_OR_MORE); } /***************************************************************************** * DREME > model > /alphabet ****************************************************************************/ static void end_ele_alphabet(PS_T *ps) { if (ps->callbacks->end_alphabet && ps->state != PS_ERROR) { ps->callbacks->end_alphabet(ps->user_data); } } /***************************************************************************** * DREME > model > alphabet > letter ****************************************************************************/ static void start_ele_alphabet_letter(PS_T *ps, const xmlChar **attrs) { char *aliases, *id, *name, *equals, symbol, complement; int colour, idx; char* names[7] = {"aliases", "colour", "complement", "equals", "id", "name", "symbol"}; int (*parsers[7])(char*, void*) = {ld_str, ld_hex, ld_char, ld_str, ld_str, ld_str, ld_char}; void *data[7] = {&aliases, &colour, &complement, &equals, &id, &name, &symbol}; BOOLEAN_T required[7] = {FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[7]; aliases = NULL; name = NULL; equals = NULL; complement = '\0'; colour = -1; parse_attributes(dreme_attr_parse_error, ps, "letter", attrs, 7, names, parsers, data, required, done); if (ps->seen_ambig) { if (equals == NULL) { error(ps, "All core symbols must appear before any ambigous symbols.\n"); } } else if (equals == NULL) { idx = rbtree_size(ps->alph_ids); rbtree_make(ps->alph_ids, id, &idx); } else { ps->seen_ambig = true; } if (ps->callbacks->handle_alphabet_letter && ps->state != PS_ERROR) { ps->callbacks->handle_alphabet_letter(ps->user_data, id, symbol, aliases, complement, equals, name, colour); } }