/***************************************************************************** * MEME > training_set > alphabet > letter * Read in a identifier and symbol pair for a letter. ****************************************************************************/ void mxml_alphabet_letter(void *ctx, char *id, char symbol, char* aliases, char complement, char *equals, char *name, int colour) { CTX_T *data; char sym[2]; data = (CTX_T*)ctx; if (data->alph != NULL) { // check that the symbol exists in the alphabet if (!alph_is_concrete(data->alph, symbol)) { local_error(data, "The symbol %c does not exist in the built-in alphabet %s.\n", symbol, alph_name(data->alph)); return; } // Note: don't bother checking complement value as it will not be set } else { if (equals == NULL) { alph_reader_core(data->alph_rdr, symbol, aliases, name, colour, complement); } else { alph_reader_ambig(data->alph_rdr, symbol, aliases, name, colour, equals); } } // create a mapping from the id to the symbol sym[0] = symbol; sym[1] = '\0'; if (!rbtree_make(data->letter_lookup, id, sym)) { local_error(data, "The letter identifier %s has been used before.\n", id); } }
/***************************************************************************** * MEME > training_set > ambigs > letter * not used in modern meme outputs ****************************************************************************/ void mxml_ambigs_letter(void *ctx, char *id, char symbol) { CTX_T *data; char sym[2]; data = (CTX_T*)ctx; // create a mapping from the id to the symbol sym[0] = symbol; sym[1] = '\0'; if (!rbtree_make(data->letter_lookup, id, sym)) { local_error(data, "The letter identifier %s has been used before.\n", id); } }
static void process_command_line(int argc, char **argv, AMA_OPTIONS_T *options) { int option_index = 0; const int num_options = 16; struct option ama_options[] = { {"max-seq-length", required_argument, NULL, OPT_MAX_SEQ_LENGTH}, {"motif", required_argument, NULL, OPT_MOTIF}, {"motif-pseudo", required_argument, NULL, OPT_MOTIF_PSEUDO}, {"rma", no_argument, NULL, OPT_RMA}, {"pvalues", no_argument, NULL, OPT_PVALUES}, {"sdbg", required_argument, NULL, OPT_SDBG}, {"norc", no_argument, NULL, OPT_NORC}, {"cs", no_argument, NULL, OPT_CS}, {"o-format", required_argument, NULL, OPT_O_FORMAT}, {"o", required_argument, NULL, OPT_O}, {"oc", required_argument, NULL, OPT_OC}, {"scoring", required_argument, NULL, OPT_SCORING}, {"verbosity", required_argument, NULL, OPT_VERBOSITY}, {"gcbins", required_argument, NULL, OPT_GCBINS}, {"last", required_argument, NULL, OPT_LAST}, {"version", no_argument, NULL, OPT_VERSION}, {NULL, 0, NULL, 0} //boundary indicator }; bool out_set = false; bool format_set = false; // set option defaults options->max_seq_length = MAX_SEQ; options->scan_both_strands = true; options->combine_duplicates = false; options->selected_motifs = rbtree_create(rbtree_strcmp, NULL, NULL, NULL, NULL); options->pseudocount = DEFAULT_PSEUDOCOUNT; options->output_format = CISML_FORMAT; options->clobber = false; options->out_dir = NULL; options->scoring = AVG_ODDS; options->pvalues = false; options->normalize_scores = false; options->num_gc_bins = 1; options->sdbg_order = -1; options->last = 0; options->motif_filename = NULL; options->fasta_filename = NULL; options->bg_filename = NULL; // parse command line while (1) { int opt = getopt_long_only(argc, argv, "", ama_options, NULL); if (opt == -1) break; switch (opt) { case OPT_MAX_SEQ_LENGTH: options->max_seq_length = atoi(optarg); break; case OPT_MOTIF: rbtree_make(options->selected_motifs, optarg, NULL); break; case OPT_MOTIF_PSEUDO: options->pseudocount = atof(optarg); break; case OPT_RMA: options->normalize_scores = true; break; case OPT_PVALUES: options->pvalues = true; break; case OPT_SDBG: options->sdbg_order = atoi(optarg); // >=0 means use sequence bkg break; case OPT_NORC: options->scan_both_strands = false; break; case OPT_CS: options->combine_duplicates = true; break; case OPT_O_FORMAT: if (out_set) { usage("Option -o-format is incompatible with option -o/-oc"); } else { format_set = true; if (strcmp(optarg, "gff") == 0) { options->output_format = GFF_FORMAT; } else if (strcmp(optarg, "cisml") == 0) { options->output_format = CISML_FORMAT; } else { usage("Output format \"%s\" is not recognised. " "Expected \"gff\" or \"cisml\".", optarg); } } break; case OPT_OC: options->clobber = true; case OPT_O: if (format_set) { usage("Option -o/-oc is incompatible with option -o-format"); } else { out_set = true; options->out_dir = optarg; options->output_format = DIRECTORY_FORMAT; } break; case OPT_SCORING: if (strcmp(optarg, "max-odds") == 0) { options->scoring = MAX_ODDS; } else if (strcmp(optarg, "avg-odds") == 0) { options->scoring = AVG_ODDS; } else if (strcmp(optarg, "sum-odds") == 0) { options->scoring = SUM_ODDS; } else { usage("Scoring method \"%s\" is not recognised. " "Expected \"max-odds\", \"avg-odds\" or \"sum-odds\".", optarg); } break; case OPT_VERBOSITY: verbosity = atoi(optarg); break; case OPT_GCBINS: options->num_gc_bins = atoi(optarg); options->pvalues = true; if (options->num_gc_bins <= 1) usage("Number of bins in --gcbins must be greater than 1."); break; case OPT_LAST: options->last = atoi(optarg); if (options->last < 0) usage("Option --last must not be negative."); break; case OPT_VERSION: fprintf(stdout, VERSION "\n"); exit(EXIT_SUCCESS); break; case '?': //unrecognised or ambiguous argument usage("Unrecognized or ambiguous option."); } } // --sdbg overrides --pvalues and --gcbins and --rma if (options->sdbg_order >= 0) { options->pvalues = false; options->normalize_scores = false; options->num_gc_bins = 1; } if (argc <= optind) usage("Expected motif file."); options->motif_filename = argv[optind++]; if (argc <= optind) usage("Expected fasta file."); options->fasta_filename = argv[optind++]; if (argc > optind) options->bg_filename = argv[optind++]; if (options->sdbg_order >= 0) { if (options->bg_filename) usage("A background file can not be used together with --sdbg."); } else { if (!options->bg_filename) usage("You must provide a background file unless you specify --sdbg."); } if (argc > optind) usage("Too many parameters"); // for now, use uniform to mimic old implementation. I will probably remove this later if (!options->bg_filename) options->bg_filename = "--uniform--"; // Record the command line. options->command_line = get_command_line(argc, argv); }
/***************************************************************************** * dreme > model > background * * type is the alphabet DNA or RNA (optional when custom alphabet specified) * <symbol> frequency of <symbol> from core alphabet * from from the negative dataset or a background file * file the background file (optional) * last_mod_date the last modified date of the background file (optional) ****************************************************************************/ static void start_ele_background(PS_T *ps, const xmlChar **attrs) { int type, from; char *file, *lastmod; // set reasonable defaults type = DREME_BG_FROM_DATASET; from = DREME_ALPH_DNA; file = NULL; lastmod = NULL; char* type_options[2] = {"dna", "rna"}; int type_values[2] = {DREME_ALPH_DNA, DREME_ALPH_RNA}; MULTI_T type_multi = {.count = 2, .options = type_options, .outputs = type_values, .target = &(type)}; char* from_options[2] = {"dataset", "file"}; int from_values[2] = {DREME_BG_FROM_DATASET, DREME_BG_FROM_FILE}; MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)}; char* names[4] = {"file", "from", "last_mod_date", "type"}; int (*parsers[4])(char*, void*) = {ld_str, ld_multi, ld_str, ld_multi}; void *data[4] = {&file, &from_multi, &lastmod, &type_multi}; BOOLEAN_T required[4] = {FALSE, TRUE, FALSE, FALSE}; BOOLEAN_T done[4]; required[3] = !ps->seen_alphabet; parse_attributes(dreme_attr_parse_error, ps, "background", attrs, 4, names, parsers, data, required, done); if (from == DREME_BG_FROM_FILE) { if (!done[0]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "file", NULL); if (!done[2]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "last_mod_date", NULL); } // if we haven't seen the alphabet then we must define it from the type if (!ps->seen_alphabet) { int idx = 0; rbtree_make(ps->alph_ids, "A", &idx); idx++; rbtree_make(ps->alph_ids, "C", &idx); idx++; rbtree_make(ps->alph_ids, "G", &idx); idx++; rbtree_make(ps->alph_ids, (type == DREME_ALPH_DNA ? "T" : "U"), &idx); } parse_freq_attrs(ps, "background", attrs); if (ps->callbacks->handle_background && ps->state != PS_ERROR) { ps->callbacks->handle_background(ps->user_data, rbtree_size(ps->alph_ids), ps->freqs, from, file, lastmod); } } /***************************************************************************** * dreme > model > stop * * evalue the stopping evalue (returned as log10). * count the stopping count. * time the stopping time. ****************************************************************************/ static void start_ele_stop(PS_T *ps, const xmlChar **attrs) { int count, time; double log10evalue; char* names[3] = {"count", "evalue", "time"}; int (*parsers[3])(char*, void*) = {ld_int, ld_log10_ev, ld_int}; void *data[3] = {&count, &log10evalue, &time}; BOOLEAN_T required[3] = {FALSE, FALSE, FALSE}; BOOLEAN_T done[3]; parse_attributes(dreme_attr_parse_error, ps, "stop", attrs, 3, names, parsers, data, required, done); if (ps->callbacks->handle_stop && ps->state != PS_ERROR) { ps->callbacks->handle_stop(ps->user_data, &log10evalue, &count, &time); } } /***************************************************************************** * dreme > model > /ngen * the number of generations to check (or something like that). ****************************************************************************/ static void end_ele_ngen(PS_T *ps) { int ngen; if (ld_int(ps->characters.buffer, &ngen)) { error(ps, "Bad value \"%s\" for ngen.\n", ps->characters.buffer); } if (ps->callbacks->handle_ngen && ps->state != PS_ERROR) { ps->callbacks->handle_ngen(ps->user_data, ngen); } }
/***************************************************************************** * dreme > model > negatives * * name the name of the negative dataset * count the number of sequences in the negative dataset * from the source of the negative dataset (eg shuffled positives) * file the file containing the negative dataset (optional) * last_mod_date the last modified date of the file (optional) ****************************************************************************/ static void start_ele_negatives(PS_T *ps, const xmlChar **attrs) { char *name, *file, *lastmod; long count; int from; file = NULL; lastmod = NULL; char* from_options[2] = {"file", "shuffled"}; int from_values[2] = {DREME_NEG_FILE, DREME_NEG_SHUFFLED}; MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)}; char* names[5] = {"count", "file", "from", "last_mod_date", "name"}; int (*parsers[5])(char*, void*) = {ld_long, ld_str, ld_multi, ld_str, ld_str}; void *data[5] = {&count, &file, &from_multi, &lastmod, &name}; BOOLEAN_T required[5] = {TRUE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[5]; parse_attributes(dreme_attr_parse_error, ps, "negatives", attrs, 5, names, parsers, data, required, done); if (ps->state != PS_ERROR && from == DREME_NEG_FILE) { if (file == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "file", NULL); } if (lastmod == NULL) { dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "last_mod_date", NULL); } } if (ps->callbacks->handle_negatives && ps->state != PS_ERROR) { ps->callbacks->handle_negatives(ps->user_data, name, count, (DREME_NEG_EN)from, file, lastmod); } } /***************************************************************************** * DREME > model > alphabet ****************************************************************************/ static void start_ele_alphabet(PS_T *ps, const xmlChar **attrs) { char *name; int extends; char* extends_options[3] = {"dna", "protein", "rna"}; int extends_values[3] = {ALPH_FLAG_EXTENDS_DNA, ALPH_FLAG_EXTENDS_PROTEIN, ALPH_FLAG_EXTENDS_RNA}; MULTI_T extends_multi = {.count = 3, .options = extends_options, .outputs = extends_values, .target = &(extends)}; char* names[2] = {"like", "name"}; int (*parsers[2])(char*, void*) = {ld_multi, ld_str}; void *data[2] = {&extends_multi, &name}; BOOLEAN_T required[2] = {FALSE, FALSE}; BOOLEAN_T done[2]; // just so we know later on when reading the background which used to set the alphabet ps->seen_alphabet = true; // defaults name = NULL; extends = 0; parse_attributes(dreme_attr_parse_error, ps, "alphabet", attrs, 2, names, parsers, data, required, done); if (ps->callbacks->start_alphabet && ps->state != PS_ERROR) { ps->callbacks->start_alphabet(ps->user_data, name, extends); } dreme_push_es(ps, PS_IN_ALPHABET_LETTER, ES_ONE_OR_MORE); } /***************************************************************************** * DREME > model > /alphabet ****************************************************************************/ static void end_ele_alphabet(PS_T *ps) { if (ps->callbacks->end_alphabet && ps->state != PS_ERROR) { ps->callbacks->end_alphabet(ps->user_data); } } /***************************************************************************** * DREME > model > alphabet > letter ****************************************************************************/ static void start_ele_alphabet_letter(PS_T *ps, const xmlChar **attrs) { char *aliases, *id, *name, *equals, symbol, complement; int colour, idx; char* names[7] = {"aliases", "colour", "complement", "equals", "id", "name", "symbol"}; int (*parsers[7])(char*, void*) = {ld_str, ld_hex, ld_char, ld_str, ld_str, ld_str, ld_char}; void *data[7] = {&aliases, &colour, &complement, &equals, &id, &name, &symbol}; BOOLEAN_T required[7] = {FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE}; BOOLEAN_T done[7]; aliases = NULL; name = NULL; equals = NULL; complement = '\0'; colour = -1; parse_attributes(dreme_attr_parse_error, ps, "letter", attrs, 7, names, parsers, data, required, done); if (ps->seen_ambig) { if (equals == NULL) { error(ps, "All core symbols must appear before any ambigous symbols.\n"); } } else if (equals == NULL) { idx = rbtree_size(ps->alph_ids); rbtree_make(ps->alph_ids, id, &idx); } else { ps->seen_ambig = true; } if (ps->callbacks->handle_alphabet_letter && ps->state != PS_ERROR) { ps->callbacks->handle_alphabet_letter(ps->user_data, id, symbol, aliases, complement, equals, name, colour); } }