/** * Creates a motif for a given mod using a simple frequency matrix. */ void create_simple_motif(SUMMARY_T* summary, MOMO_OPTIONS_T* options, MOD_INFO_T * mod_info) { int i; int j; const char* alph_letters = summary->alph_letters; // Create the frequency matrix MATRIX_T* freqs = NULL; freqs = get_count_matrix(freqs, mod_info->seq_list, NULL, options, summary); normalize_rows(0.0, freqs); // Create the motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); motifinfo->motif = allocate_motif(mod_info->mod_name, "", summary->alph, freqs, NULL); motifinfo->seqs = arraylst_create(); for (i = 0; i < arraylst_size(mod_info->seq_list); ++i) { SEQ_T* seqobject = options->eliminate_repeats ? hash_get_entry_value(arraylst_get(i, mod_info->seq_list)) : arraylst_get(i, mod_info->seq_list); arraylst_add(get_raw_sequence(seqobject), motifinfo->seqs); } motifinfo->fg_size = arraylst_size(mod_info->seq_list); arraylst_add(motifinfo, mod_info->motifinfos); // clean up free_matrix(freqs); }
/*********************************************************************** * Converts a list of TRANSFAC motifs to a list MEME motif. * If the use_accession parameter is true the TRANSFAC accession * is used as the name of the MEME motif. Otherwise the ID is used. * Caller is responsible for freeing the returned ARRAYLST ***********************************************************************/ ARRAYLST_T *convert_transfac_motifs_to_meme_motifs( BOOLEAN_T use_accession, int pseudocount, ARRAY_T *bg, ARRAYLST_T *tfac_motifs ) { int num_motifs = arraylst_size(tfac_motifs); ARRAYLST_T *meme_motifs = arraylst_create_sized(num_motifs); int motif_index = 0; for (motif_index = 0; motif_index < num_motifs; ++motif_index) { TRANSFAC_MOTIF_T *tfac_motif = (TRANSFAC_MOTIF_T *) arraylst_get(motif_index, tfac_motifs); char *name = NULL; if (use_accession == TRUE) { name = get_transfac_accession(tfac_motif); if (name == NULL) { die("No accession string found in TRANSFAC motif."); } } else { name = get_transfac_id(tfac_motif); if (name == NULL) { die("No ID string found in TRANSFAC motif."); } } MOTIF_T *meme_motif = convert_transfac_motif_to_meme_motif(name, pseudocount, bg, tfac_motif); arraylst_add(meme_motif, meme_motifs); } return meme_motifs; }
/* * Loads all the currently buffered motifs into a list. * If the file is set then this will read all the motifs in the * file into the list. If a list is not passed then * it will create a new one. * returns the list. */ ARRAYLST_T* mread_load(MREAD_T *mread, ARRAYLST_T *motifs) { MOTIF_T *motif; if (motifs == NULL) motifs = arraylst_create(); while ((motif = mread_next_motif(mread)) != NULL) { arraylst_add(motif, motifs); } return motifs; }
/************************************************************************* * Read a motif database *************************************************************************/ static MOTIF_DB_T* read_motifs(int id, char* motif_source, char* bg_source, ARRAY_T** bg, double pseudocount, RBTREE_T *selected, ALPH_T alph) { // vars int read_motifs; MOTIF_DB_T* motifdb; MREAD_T *mread; MOTIF_T *motif; ARRAYLST_T *motifs; // open the motif file for reading mread = mread_create(motif_source, OPEN_MFILE); mread_set_pseudocount(mread, pseudocount); // determine background to use if (*bg != NULL) mread_set_background(mread, *bg); else mread_set_bg_source(mread, bg_source); // load motifs read_motifs = 0; if (rbtree_size(selected) > 0) { motifs = arraylst_create(); while(mread_has_motif(mread)) { motif = mread_next_motif(mread); read_motifs++; if (rbtree_find(selected, get_motif_id(motif))) { arraylst_add(motif, motifs); } else { DEBUG_FMT(NORMAL_VERBOSE, "Discarding motif %s in %s.\n", get_motif_id(motif), motif_source); destroy_motif(motif); } } } else { motifs = mread_load(mread, NULL); read_motifs = arraylst_size(motifs); } arraylst_fit(motifs); if (read_motifs > 0) { // check the alphabet if (mread_get_alphabet(mread) != alph) { die("Expected %s alphabet motifs\n", alph_name(alph)); } // get the background if (*bg == NULL) *bg = mread_get_background(mread); } else { fprintf(stderr, "Warning: Motif file %s contains no motifs.\n", motif_source); } // clean up motif reader mread_destroy(mread); // create motif db motifdb = mm_malloc(sizeof(MOTIF_DB_T)); memset(motifdb, 0, sizeof(MOTIF_DB_T)); motifdb->id = id; motifdb->source = strdup(motif_source); motifdb->motifs = motifs; return motifdb; }
/***************************************************************************** * MEME > scanned_sites_summary > scanned_sites ****************************************************************************/ void mxml_start_scanned_seq(void *ctx, char *seq_id, double log10pvalue, int site_count) { CTX_T *data; int *length; struct seqinfo *seq; data = (CTX_T*)ctx; if (data->options & SCANNED_SITES) { data->current_site = 0; seq = (struct seqinfo *)rbtree_get(data->sequence_lookup, seq_id); if (seq == NULL) { local_error(data, "Scanned sites references unknown sequence \"%s\".\n", seq_id); return; } arraylst_add(sseq_create(seq->name, seq->length, log10pvalue, site_count), data->fscope.scanned_sites); } }
ARRAYLST_T* load_motifs(AMA_OPTIONS_T *opts) { ARRAYLST_T *motifs; ARRAY_T *pos_bg_freqs, *rev_bg_freqs; MREAD_T *mread; MOTIF_T *motif, *motif_rc; double range; PSSM_T *pos_pssm, *neg_pssm; int total_motifs; ALPH_T *alph; // // Read the motifs and background model. // //this reads any meme file, xml, txt and html mread = mread_create(opts->motif_filename, OPEN_MFILE); mread_set_bg_source(mread, opts->bg_filename); mread_set_pseudocount(mread, opts->pseudocount); // sanity check, since the rest of the code relies on the motifs being complementable alph = alph_hold(mread_get_alphabet(mread)); if (alph == NULL) die("Unable to determine alphabet from motifs"); if (opts->scan_both_strands && !alph_has_complement(alph)) { opts->scan_both_strands = false; } if (opts->num_gc_bins > 1 && alph_size_core(alph) != 4 && alph_size_pairs(alph) != 2) { fprintf(stderr, "Warning: The motif alphabet does not have exactly 2 complementary pairs so \"GC binning\" will be disabled.\n"); opts->num_gc_bins = 1; } pos_bg_freqs = mread_get_background(mread); rev_bg_freqs = NULL; if (opts->scan_both_strands) { rev_bg_freqs = allocate_array(get_array_length(pos_bg_freqs)); copy_array(pos_bg_freqs, rev_bg_freqs); complement_swap_freqs(alph, rev_bg_freqs, rev_bg_freqs); } // allocate memory for motifs motifs = arraylst_create(); // // Convert motif matrices into log-odds matrices. // Scale them. // Compute the lookup tables for the PDF of scaled log-odds scores. // range = 300; // 100 is not very good; 1000 is great but too slow neg_pssm = NULL; total_motifs = 0; while (mread_has_motif(mread)) { motif = mread_next_motif(mread); total_motifs++; if (rbtree_size(opts->selected_motifs) == 0 || rbtree_find(opts->selected_motifs, get_motif_id(motif)) != NULL) { if (verbosity >= HIGH_VERBOSE) { fprintf(stderr, "Using motif %s of width %d.\n", get_motif_id(motif), get_motif_length(motif)); } pos_pssm = build_motif_pssm( motif, pos_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, opts->num_gc_bins, true ); // // Note: If scanning both strands, we complement the motif frequencies // but not the background frequencies so the motif looks the same. // However, the given frequencies are used in computing the p-values // since they represent the frequencies on the negative strands. // (If we instead were to complement the input sequence, keeping the // the motif fixed, we would need to use the complemented frequencies // in computing the p-values. Is that any clearer?) // if (opts->scan_both_strands) { motif_rc = dup_rc_motif(motif); neg_pssm = build_motif_pssm( motif_rc, rev_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, opts->num_gc_bins, true ); destroy_motif(motif_rc); } arraylst_add(motif_and_pssm_create(motif, pos_pssm, neg_pssm), motifs); } else { if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "Skipping motif %s.\n", get_motif_id(motif)); destroy_motif(motif); } } mread_destroy(mread); free_array(pos_bg_freqs); free_array(rev_bg_freqs); alph_release(alph); if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Loaded %d/%d motifs from %s.\n", arraylst_size(motifs), total_motifs, opts->motif_filename); } return motifs; }
/** * Recursive function. Creates and stores a motif using the motif-x * algorithm until no more are left. */ void create_motifx_motif(ARRAYLST_T* phospho_seqs, ARRAYLST_T* bg_seqs, MOTIFX_STATUS_T** phospho_status, MOTIFX_STATUS_T** bg_status, MATRIX_T* phospho_count, MATRIX_T* bg_count, int* num_active, int* num_bg_active, char* modname, MOD_INFO_T* mod_info, MOMO_OPTIONS_T* options, SUMMARY_T* summary) { int i; int j; const char* alph_letters = summary->alph_letters; // Initialize pattern, sequence count, bg sequence count, and overall score for this motif. char* pattern = mm_malloc(options->width + 1); for (i = 0; i < options->width; ++i) { pattern[i] = 'X'; } pattern[options->width] = '\0'; int* num_active_copy = mm_malloc(sizeof(int)); *num_active_copy = *num_active; int* num_bg_active_copy = mm_malloc(sizeof(int)); *num_bg_active_copy = *num_bg_active; double* motif_score = mm_malloc(sizeof(double)); *motif_score = 0; // Set the pattern, num active copy, num bg active copy, motif score, and get a count of the sequences MATRIX_T* result_count_matrix = add_to_pattern(pattern, phospho_seqs, bg_seqs, phospho_status, bg_status, num_active_copy, num_bg_active_copy, phospho_count, bg_count, motif_score, summary, options); // If any of the characters are not X, then we have found a pattern BOOLEAN_T found_pattern = FALSE; for (i = 0; i < options->width; ++i) { if (pattern[i] != 'X') { found_pattern = TRUE; } } // If there is a pattern, store the pattern and call create_motifx_motif again. if (found_pattern) { // fill out the rest of the pattern (e.g. if you have pattern ..ASAAA, and realize the actual pattern is A.ASAAA for (i = 0; i < options->width; i++) { for (j = 0; j < strlen(alph_letters); j++) { if ((int) get_matrix_cell_defcheck(i, j, result_count_matrix) == *num_active_copy) { pattern[i] = alph_letters[j]; } } } // create the pattern name char* pattern_name = mm_malloc(strlen(pattern) + strlen(modname) + 3); pattern_name[0] = '\0'; strncat(pattern_name, pattern, strlen(pattern)/2); strncat(pattern_name, "_", 1); strncat(pattern_name, modname, strlen(modname)); strncat(pattern_name, "_", 1); strncat(pattern_name, pattern + strlen(pattern)/2 + 1, strlen(pattern)/2); // convert this count matrix into frequencies normalize_rows(0.0, result_count_matrix); // Store this motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); MOTIF_T* motif = allocate_motif(pattern_name, "", summary->alph, result_count_matrix, NULL); set_motif_nsites(motif, *num_active_copy); motifinfo->motif = motif; motifinfo->seqs = arraylst_create(); motifinfo->score = *motif_score; motifinfo->fg_match = *num_active_copy; motifinfo->fg_size = *num_active; motifinfo->bg_match = *num_bg_active_copy; motifinfo->bg_size = *num_bg_active; for (i = 0; i < arraylst_size(phospho_seqs); ++i) { MOTIFX_STATUS_T status = (*phospho_status)[i]; if (status == ACTIVE) { SEQ_T* active_sequence = (options->eliminate_repeats) ? hash_get_entry_value(arraylst_get(i, phospho_seqs)) : arraylst_get(i, phospho_seqs); arraylst_add(get_raw_sequence(active_sequence), motifinfo->seqs); } } arraylst_add(motifinfo, mod_info->motifinfos); // delete the sequences from this motif. turn inactive into active. delete_sequences(phospho_status, arraylst_size(phospho_seqs)); delete_sequences(bg_status, arraylst_size(bg_seqs)); // update the count of number of actives *num_active = *num_active - *num_active_copy; *num_bg_active = *num_bg_active - *num_bg_active_copy; // recalculate phospho count and bg count. phospho_count = get_count_matrix(phospho_count, phospho_seqs, phospho_status, options, summary); bg_count = get_count_matrix(bg_count, bg_seqs, bg_status, options, summary); // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); myfree(pattern_name); // try to create another motif. create_motifx_motif(phospho_seqs, bg_seqs, phospho_status, bg_status, phospho_count, bg_count, num_active, num_bg_active, modname, mod_info, options, summary); } // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); }
/*********************************************************************** * Read TRANSFAC motifs from a TRANSFAC file. * Returns an arraylist of pointers to TRANSFAC_MOTIF_T ***********************************************************************/ ARRAYLST_T *read_motifs_from_transfac_file ( const char* transfac_filename // Name of TRANSFAC file or '-' for stdin IN ) { // Create dynamic storage for motifs ARRAYLST_T *motif_list = arraylst_create(); // Open the TRANFAC file for reading. FILE *transfac_file = NULL; if (open_file( transfac_filename, "r", TRUE, // Allow '-' for stdin "transfac file", "", &transfac_file ) == FALSE) { exit(1); } // Read and parse the TRANFAC file. int num_bases = 4; char *line = NULL; while ((line = getline2(transfac_file)) != NULL) { // Split the line into an initial tag and everything else. char *this_accession = split(line, ' '); char *tag = line; // Have we reached a new matrix? if (strcmp(tag, "AC") == 0) { trim(this_accession); char *this_id = NULL; char *this_name = NULL; char *this_descr = NULL; char *this_species = NULL; char this_consensus[MAX_CONSENSUS_LENGTH]; STRING_LIST_T *species_list = new_string_list(); // Old versions of TRANSFAC use pee-zero; new use pee-oh. while (strcmp(tag, "PO") != 0 && strcmp(tag, "P0") != 0) { line = getline2(transfac_file); if (line == NULL) { die ("Can't find PO line for TRANSFAC matrix %s.\n", this_accession); } char *data = split(line, ' '); if (data != NULL) { trim(data); } tag = line; // Store the id line. if (strcmp(tag, "ID") == 0) { this_id = strdup(data); } // Store the species line. else if (strcmp(tag, "BF") == 0) { add_string(data, species_list); } // Store the name line. else if (strcmp(tag, "NA") == 0) { this_name = strdup(data); } // Store the description line. else if (strcmp(tag, "DE") == 0) { this_descr = strdup(data); } } // Check how many positions in the motif // Mark current position in file fpos_t file_position; errno = 0; int status = fgetpos(transfac_file, &file_position); if (status) { die("Error reading file %s: %s", transfac_filename, strerror(errno)); } int num_motif_positions = 0; while (TRUE) { // Read till we reach the end of the counts or the end of the motif line = getline2(transfac_file); if (line == NULL) { break; } char *data = split(line, ' '); if (data != NULL) { trim(data); } tag = line; // Read till we reach the end of the counts or the end of the motif if ((strcmp(tag, "XX\n") == 0) || (strcmp(tag, "//\n") == 0)) { break; } ++num_motif_positions; } // Rewind file errno = 0; status = fsetpos(transfac_file, &file_position); if (status) { die("Error reading file %s: %s", transfac_filename, strerror(errno)); } // Read the motif counts. int num_seqs = 0; this_consensus[0] = 0; MATRIX_T *motif_counts = allocate_matrix(num_motif_positions, 4); int position = 0; while (TRUE) { line = getline2(transfac_file); if (line == NULL) { break; } char *data = split(line, ' '); if (data != NULL) { trim(data); } tag = line; // Look for the end of the motif. if ((strcmp(tag, "XX\n") == 0) || (strcmp(tag, "//\n") == 0)) { break; } position = atoi(tag); if (position > num_motif_positions) { die( "Error reading motif counts at position %d of motif %s in file %s", position, this_accession, transfac_filename ); } // Store the contents of this row. int count[4]; char consensus; sscanf( data, "%d %d %d %d %c", &(count[0]), &(count[1]), &(count[2]), &(count[3]), &consensus ); int i_base; for (i_base = 0; i_base < num_bases; i_base++) { set_matrix_cell(position - 1, i_base, count[i_base], motif_counts); } this_consensus[position - 1] = consensus; } this_consensus[position] = 0; TRANSFAC_MOTIF_T *motif = new_transfac_motif( this_accession, this_id, this_name, this_descr, this_consensus, species_list, motif_counts ); arraylst_add(motif, motif_list); } } fclose(transfac_file); return motif_list; }
/************************************************************************* * Entry point for centrimo *************************************************************************/ int main(int argc, char *argv[]) { CENTRIMO_OPTIONS_T options; SEQ_SITES_T seq_sites; SITE_COUNTS_T counts; int seqN, motifN, seqlen, db_i, motif_i, i; double log_pvalue_thresh; SEQ_T** sequences = NULL; ARRAY_T* bg_freqs = NULL; ARRAYLST_T *stats_list; MOTIF_DB_T **dbs, *db; MREAD_T *mread; MOTIF_STATS_T *stats; MOTIF_T *motif, *rev_motif; PSSM_T *pos_pssm, *rev_pssm; char *sites_path, *desc; FILE *sites_file; HTMLWR_T *html; JSONWR_T *json; // COMMAND LINE PROCESSING process_command_line(argc, argv, &options); // load the sequences read_sequences(options.alphabet, options.seq_source, &sequences, &seqN); seqlen = (seqN ? get_seq_length(sequences[0]) : 0); // calculate a sequence background (unless other background is given) if (!options.bg_source) { bg_freqs = calc_bg_from_fastas(options.alphabet, seqN, sequences); } // load the motifs motifN = 0; dbs = mm_malloc(sizeof(MOTIF_DB_T*) * arraylst_size(options.motif_sources)); for (i = 0; i < arraylst_size(options.motif_sources); i++) { char* db_source; db_source = (char*)arraylst_get(i, options.motif_sources); dbs[i] = read_motifs(i, db_source, options.bg_source, &bg_freqs, options.pseudocount, options.selected_motifs, options.alphabet); motifN += arraylst_size(dbs[i]->motifs); } log_pvalue_thresh = log(options.evalue_thresh) - log(motifN); // Setup some things for double strand scanning if (options.scan_both_strands == TRUE) { // Set up hash tables for computing reverse complement setup_hash_alph(DNAB); setalph(0); // Correct background by averaging on freq. for both strands. average_freq_with_complement(options.alphabet, bg_freqs); normalize_subarray(0, alph_size(options.alphabet, ALPH_SIZE), 0.0, bg_freqs); calc_ambigs(options.alphabet, FALSE, bg_freqs); } // Create output directory if (create_output_directory(options.output_dirname, options.allow_clobber, (verbosity >= NORMAL_VERBOSE))) { die("Couldn't create output directory %s.\n", options.output_dirname); } // open output files sites_path = make_path_to_file(options.output_dirname, SITES_FILENAME); sites_file = fopen(sites_path, "w"); free(sites_path); // setup html monolith writer json = NULL; if ((html = htmlwr_create(get_meme_etc_dir(), TEMPLATE_FILENAME))) { htmlwr_set_dest_name(html, options.output_dirname, HTML_FILENAME); htmlwr_replace(html, "centrimo_data.js", "data"); json = htmlwr_output(html); if (json == NULL) die("Template does not contain data section.\n"); } else { DEBUG_MSG(QUIET_VERBOSE, "Failed to open html template file.\n"); } if (json) { // output some top level variables jsonwr_str_prop(json, "version", VERSION); jsonwr_str_prop(json, "revision", REVISION); jsonwr_str_prop(json, "release", ARCHIVE_DATE); jsonwr_str_array_prop(json, "cmd", argv, argc); jsonwr_property(json, "options"); jsonwr_start_object_value(json); jsonwr_dbl_prop(json, "motif-pseudo", options.pseudocount); jsonwr_dbl_prop(json, "score", options.score_thresh); jsonwr_dbl_prop(json, "ethresh", options.evalue_thresh); jsonwr_lng_prop(json, "maxbin", options.max_window+1); jsonwr_bool_prop(json, "norc", !options.scan_both_strands); jsonwr_bool_prop(json, "noflip", options.no_flip); jsonwr_end_object_value(json); // output the description desc = prepare_description(&options); if (desc) { jsonwr_str_prop(json, "job_description", desc); free(desc); } // output size metrics jsonwr_lng_prop(json, "seqlen", seqlen); jsonwr_lng_prop(json, "tested", motifN); // output the fasta db jsonwr_property(json, "sequence_db"); jsonwr_start_object_value(json); jsonwr_str_prop(json, "source", options.seq_source); jsonwr_lng_prop(json, "count", seqN); jsonwr_end_object_value(json); // output the motif dbs jsonwr_property(json, "motif_dbs"); jsonwr_start_array_value(json); for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) { db = dbs[db_i]; jsonwr_start_object_value(json); jsonwr_str_prop(json, "source", db->source); jsonwr_lng_prop(json, "count", arraylst_size(db->motifs)); jsonwr_end_object_value(json); } jsonwr_end_array_value(json); // start the motif array jsonwr_property(json, "motifs"); jsonwr_start_array_value(json); } /************************************************************** * Tally the positions of the best sites for each of the * selected motifs. **************************************************************/ // prepare the sequence sites memset(&seq_sites, 0, sizeof(SEQ_SITES_T)); // prepare the site counts counts.allocated = ((2 * seqlen) - 1); counts.sites = mm_malloc(sizeof(double) * counts.allocated); // prepare the motifs stats list stats_list = arraylst_create(); // prepare the other vars motif = NULL; pos_pssm = NULL; rev_motif = NULL; rev_pssm = NULL; for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) { db = dbs[db_i]; for (motif_i = 0; motif_i < arraylst_size(db->motifs); motif_i++) { motif = (MOTIF_T *) arraylst_get(motif_i, db->motifs); DEBUG_FMT(NORMAL_VERBOSE, "Using motif %s of width %d.\n", get_motif_id(motif), get_motif_length(motif)); // reset the counts for (i = 0; i < counts.allocated; i++) counts.sites[i] = 0; counts.total_sites = 0; // create the pssm pos_pssm = make_pssm(bg_freqs, motif); // If required, do the same for the reverse complement motif. if (options.scan_both_strands) { rev_motif = dup_rc_motif(motif); rev_pssm = make_pssm(bg_freqs, rev_motif); } // scan the sequences for (i = 0; i < seqN; i++) score_sequence(&options, sequences[i], pos_pssm, rev_pssm, &seq_sites, &counts); // DEBUG check that the sum of the sites is close to the site count double sum_check = 0, sum_diff; for (i = 0; i < counts.allocated; i++) sum_check += counts.sites[i]; sum_diff = counts.total_sites - sum_check; if (sum_diff < 0) sum_diff = -sum_diff; if (sum_diff > 0.1) { fprintf(stderr, "Warning: site counts don't sum to accurate value! " "%g != %ld", sum_check, counts.total_sites); } // output the plain text site counts output_site_counts(sites_file, seqlen, db, motif, &counts); // compute the best central window stats = compute_stats(options.max_window, seqlen, db, motif, &counts); // check if it passes the threshold if (json && stats->log_adj_pvalue <= log_pvalue_thresh) { output_motif_json(json, stats, &counts); arraylst_add(stats, stats_list); } else { free(stats); } // Free memory associated with this motif. free_pssm(pos_pssm); free_pssm(rev_pssm); destroy_motif(rev_motif); } } if (json) jsonwr_end_array_value(json); // finish writing sites fclose(sites_file); // finish writing html file if (html) { if (htmlwr_output(html) != NULL) { die("Found another JSON replacement!\n"); } htmlwr_destroy(html); } // write text file output_centrimo_text(&options, motifN, stats_list); // Clean up. for (i = 0; i < seqN; ++i) { free_seq(sequences[i]); } free(sequences); for (i = 0; i < arraylst_size(options.motif_sources); i++) { free_db(dbs[i]); } free(dbs); free_array(bg_freqs); free(counts.sites); free(seq_sites.sites); arraylst_destroy(free, stats_list); cleanup_options(&options); return 0; }
/*********************************************************************** Process command line options ***********************************************************************/ static void process_command_line( int argc, char* argv[], CENTRIMO_OPTIONS_T *options ) { // Define command line options. const int num_options = 12; cmdoption const centrimo_options[] = { {"bgfile", REQUIRED_VALUE}, {"o", REQUIRED_VALUE}, {"oc", REQUIRED_VALUE}, {"score", REQUIRED_VALUE}, {"motif-pseudo", REQUIRED_VALUE}, {"ethresh", REQUIRED_VALUE}, {"maxbin", REQUIRED_VALUE}, {"norc", NO_VALUE}, {"noflip", NO_VALUE}, {"desc", REQUIRED_VALUE}, {"dfile", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE} }; int option_index = 0; /* Make sure various options are set to NULL or defaults. */ options->alphabet = DNA_ALPH; options->allow_clobber = TRUE; options->scan_both_strands = TRUE; options->no_flip = FALSE; options->description = NULL; options->desc_file = NULL; options->bg_source = NULL; options->output_dirname = "centrimo_out"; options->seq_source = NULL; options->motif_sources = arraylst_create(); options->score_thresh = DEFAULT_SCORE_THRESH; options->pseudocount = DEFAULT_PSEUDOCOUNT; options->evalue_thresh = DEFAULT_EVALUE_THRESH; options->max_window = DEFAULT_MAX_WINDOW; // no need to copy, as string is declared in argv array options->selected_motifs = rbtree_create(rbtree_strcmp, NULL, NULL, NULL, NULL); verbosity = NORMAL_VERBOSE; simple_setopt(argc, argv, num_options, centrimo_options); // Parse the command line. while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); fprintf(stderr, "Error processing command line options (%s)\n", message); fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH, DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE); exit(EXIT_FAILURE); } if (strcmp(option_name, "bgfile") == 0){ options->bg_source = option_value; } else if (strcmp(option_name, "ethresh") == 0){ options->evalue_thresh = atof(option_value); } else if (strcmp(option_name, "maxbin") == 0){ // max_window is one less than the number of places a motif can align // within the central window options->max_window = atoi(option_value) - 1; } else if (strcmp(option_name, "motif") == 0){ rbtree_put(options->selected_motifs, option_value, NULL); } else if (strcmp(option_name, "motif-pseudo") == 0){ options->pseudocount = atof(option_value); } else if (strcmp(option_name, "norc") == 0){ options->scan_both_strands = FALSE; } else if (strcmp(option_name, "noflip") == 0){ options->no_flip = TRUE; } else if (strcmp(option_name, "o") == 0){ // Set output directory with no clobber options->output_dirname = option_value; options->allow_clobber = FALSE; } else if (strcmp(option_name, "oc") == 0){ // Set output directory with clobber options->output_dirname = option_value; options->allow_clobber = TRUE; } else if (strcmp(option_name, "score") == 0){ options->score_thresh = atof(option_value); } else if (strcmp(option_name, "desc") == 0) { options->description = option_value; } else if (strcmp(option_name, "dfile") == 0) { options->desc_file = option_value; } else if (strcmp(option_name, "verbosity") == 0){ verbosity = atoi(option_value); } } // Must have sequence and motif file names if (argc < option_index + 2) { fprintf(stderr, "Sequences and motifs are both required\n"); fprintf(stderr, CENTRIMO_USAGE, DEFAULT_PSEUDOCOUNT, DEFAULT_SCORE_THRESH, DEFAULT_EVALUE_THRESH, NORMAL_VERBOSE); exit(EXIT_FAILURE); } // Record the input file names options->seq_source = argv[option_index++]; for (;option_index < argc; option_index++) arraylst_add(argv[option_index], options->motif_sources); // Set up path values for needed stylesheets and output files. }