/*********************************************************************** * Discard motifs that are not connected. ***********************************************************************/ static void throw_out_unused_motifs (MATRIX_T* transp_freq, MATRIX_T* spacer_ave, int* num_motifs, MOTIF_T* motifs) { int i_motif, j_motif; ARRAY_T* row_sums; ARRAY_T* col_sums; // Extract the margins of the transition matrix. row_sums = get_matrix_row_sums(transp_freq); col_sums = get_matrix_col_sums(transp_freq); for (i_motif = 0; i_motif < *num_motifs; i_motif++) { // Is this row or column empty? if ((get_array_item(i_motif + 1, row_sums) == 0.0) || (get_array_item(i_motif + 1, col_sums) == 0.0)) { if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Removing unused motif %s. No occurrences of this motif were found.\n", get_motif_id(&(motifs[i_motif]))); } // Remove the row and column from the transition matrix. remove_matrix_row(i_motif + 1, transp_freq); remove_matrix_col(i_motif + 1, transp_freq); assert(get_num_rows(transp_freq) == get_num_cols(transp_freq)); remove_matrix_row(i_motif + 1, spacer_ave); remove_matrix_col(i_motif + 1, spacer_ave); assert(get_num_rows(spacer_ave) == get_num_cols(spacer_ave)); // Remove the motif from the array. for (j_motif = i_motif + 1; j_motif < *num_motifs; j_motif++) { free_motif(&(motifs[j_motif - 1])); copy_motif(&(motifs[j_motif]), &(motifs[j_motif - 1])); } free_motif(&(motifs[j_motif - 1])); (*num_motifs)--; i_motif--; // Recalculate the row and column sums. free_array(row_sums); free_array(col_sums); row_sums = get_matrix_row_sums(transp_freq); col_sums = get_matrix_col_sums(transp_freq); } } free_array(row_sums); free_array(col_sums); }
/*********************************************************************** * Free an array of motifs ***********************************************************************/ void free_motif_array(MOTIF_T *motif_array, int num) { int i; for (i = 0; i < num; ++i) { free_motif(motif_array+i); } free(motif_array); }
/************************************************************************** * Destroys a secondary motif. * It takes a void * pointer so it can be used in the collection objects. **************************************************************************/ void destroy_secondary_motif(void *p) { int i; SECONDARY_MOTIF_T *smotif = (SECONDARY_MOTIF_T*)p; free_motif(smotif->motif); free(smotif->motif); for (i = 0; i < 4; ++i) destroy_spacings((smotif->spacings)+i); if (smotif->sigs) free(smotif->sigs); if (smotif->seqs) free(smotif->seqs); free(smotif); }
/************************************************************************* * int main *************************************************************************/ int main(int argc, char *argv[]) { /* Data structures. */ int num_motifs; /* The number of motifs in the model. */ MOTIF_T motifs[2 * MAX_MOTIFS]; /* The motifs. */ STRING_LIST_T* motif_occurrences = NULL; /* Strings describing occurrences of motifs */ BOOLEAN_T has_reverse_strand = FALSE; /* MEME file contained both strands */ ARRAY_T* background; /* Background probs for alphabet. */ ORDER_T* order_spacing; /* Linear HMM order and spacing. */ MATRIX_T* transp_freq = NULL; /* Matrix of inter-motif transitions freqs. */ MATRIX_T* spacer_ave = NULL; /* Matrix of average spacer lengths. */ MHMM_T * the_hmm = NULL; /* The HMM being constructed. */ /* Command line parameters. */ char * meme_filename; /* Input file containg motifs. */ char * hmm_type_str; /* HMM type. */ HMM_T hmm_type; STRING_LIST_T* requested_motifs; /* Indices of requested motifs. */ int request_n; /* The user asked for the first n motifs. */ double e_threshold; /* E-value threshold for motif inclusion. */ double complexity_threshold; // For eliminating low-complexity motifs. double p_threshold; /* p-value threshold for motif occurences. */ char* order_string; /* Motif order and spacing. */ int spacer_states; /* Number of states in each spacer. */ BOOLEAN_T fim; /* Represent spacers as free insertion modules? */ BOOLEAN_T keep_unused; // Drop unused inter-motif transitions? double trans_pseudo; /* Transition pseudocount. */ double spacer_pseudo; // Spacer (self-loop) pseudocount. */ char* description; // Descriptive text to be stored in model. BOOLEAN_T print_header; /* Print file header? */ BOOLEAN_T print_params; /* Print parameter summary? */ BOOLEAN_T print_time; /* Print timing data (dummy: always false). */ /* Local variables. */ int i_motif; /********************************************** * COMMAND LINE PROCESSING **********************************************/ // Define command line options. cmdoption const options[] = { {"type", OPTIONAL_VALUE}, {"description", REQUIRED_VALUE}, {"motif", REQUIRED_VALUE}, {"nmotifs", REQUIRED_VALUE}, {"ethresh", REQUIRED_VALUE}, {"lowcomp", REQUIRED_VALUE}, {"pthresh", REQUIRED_VALUE}, {"order", REQUIRED_VALUE}, {"nspacer", REQUIRED_VALUE}, {"fim", NO_VALUE}, {"keep-unused", NO_VALUE}, {"transpseudo", REQUIRED_VALUE}, {"spacerpseudo", REQUIRED_VALUE}, {"verbosity", REQUIRED_VALUE}, {"noheader", NO_VALUE}, {"noparams", NO_VALUE}, {"notime", NO_VALUE}, {"quiet", NO_VALUE}, }; int option_count = 18; int option_index = 0; // Define the usage message. char usage[1000] = ""; strcat(usage, "USAGE: mhmm [options] <MEME file>\n"); strcat(usage, "\n"); strcat(usage, " Options:\n"); strcat(usage, " --type [linear|complete|star] (default=linear)\n"); strcat(usage, " --description <string> (may be repeated)\n"); strcat(usage, " --motif <motif #> (may be repeated)\n"); strcat(usage, " --nmotifs <#>\n"); strcat(usage, " --ethresh <E-value>\n"); strcat(usage, " --lowcomp <value>\n"); strcat(usage, " --pthresh <p-value>\n"); strcat(usage, " --order <string>\n"); strcat(usage, " --nspacer <spacer length> (default=1)\n"); strcat(usage, " --fim\n"); strcat(usage, " --keep-unused\n"); strcat(usage, " --transpseudo <pseudocount>\n"); strcat(usage, " --spacerpseudo <pseudocount>\n"); strcat(usage, " --verbosity 1|2|3|4|5 (default=2)\n"); strcat(usage, " --noheader\n"); strcat(usage, " --noparams\n"); strcat(usage, " --notime\n"); strcat(usage, " --quiet\n"); strcat(usage, "\n"); /* Make sure various options are set to NULL or defaults. */ meme_filename = NULL; hmm_type_str = NULL; hmm_type = INVALID_HMM; requested_motifs = new_string_list(); request_n = 0; e_threshold = 0.0; complexity_threshold = 0.0; p_threshold = 0.0; order_string = NULL; spacer_states = DEFAULT_SPACER_STATES, fim = FALSE; keep_unused = FALSE; trans_pseudo = DEFAULT_TRANS_PSEUDO; spacer_pseudo = DEFAULT_SPACER_PSEUDO; description = NULL; print_header = TRUE; print_params = TRUE; print_time = FALSE; simple_setopt(argc, argv, option_count, options); // Parse the command line. while (1) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { simple_getopterror(&message); die("Error processing command line options (%s)\n", message); } if (strcmp(option_name, "type") == 0) { if (option_value != NULL) { hmm_type_str = option_value; } } else if (strcmp(option_name, "description") == 0) { description = option_value; } else if (strcmp(option_name, "motif") == 0) { add_string(option_value, requested_motifs); } else if (strcmp(option_name, "nmotifs") == 0) { request_n = atoi(option_value); } else if (strcmp(option_name, "ethresh") == 0) { e_threshold = atof(option_value); } else if (strcmp(option_name, "lowcomp") == 0) { complexity_threshold = atof(option_value); } else if (strcmp(option_name, "pthresh") == 0) { p_threshold = atof(option_value); } else if (strcmp(option_name, "order") == 0) { order_string = option_value; } else if (strcmp(option_name, "nspacer") == 0) { spacer_states = atoi(option_value); } else if (strcmp(option_name, "fim") == 0) { fim = TRUE; } else if (strcmp(option_name, "keep-unused") == 0) { keep_unused = TRUE; } else if (strcmp(option_name, "transpseudo") == 0) { trans_pseudo = atof(option_value); } else if (strcmp(option_name, "spacerpseudo") == 0) { spacer_pseudo = atof(option_value); } else if (strcmp(option_name, "verbosity") == 0) { verbosity = (VERBOSE_T)atoi(option_value); } else if (strcmp(option_name, "noheader") == 0) { print_header = FALSE; } else if (strcmp(option_name, "noparams") == 0) { print_params = FALSE; } else if (strcmp(option_name, "notime") == 0) { print_time = FALSE; } else if (strcmp(option_name, "quiet") == 0) { print_header = print_params = print_time = FALSE; verbosity = QUIET_VERBOSE; } } // Read the single required argument. if (option_index + 1 != argc) { fprintf(stderr, "%s", usage); exit(1); } meme_filename = argv[option_index]; // Set up motif requests. if (request_n != 0) { if (get_num_strings(requested_motifs) != 0) { die("Can't combine the -motif and -nmotifs options.\n"); } else { for (i_motif = 0; i_motif < request_n; i_motif++) { char motif_id[MAX_MOTIF_ID_LENGTH + 1]; sprintf(motif_id, "%d", i_motif + 1); add_string(motif_id, requested_motifs); } } } /* Set the model type. */ hmm_type = convert_enum_type_str(hmm_type_str, LINEAR_HMM, HMM_STRS, NUM_HMM_T); /* Gotta have positive spacer length. */ if (spacer_states <= 0) { die("Negative spacer length (%d).\n", spacer_states); } /* Make sure motifs weren't selected redundantly. */ // FIXME: Add tests for complexity threshold. if ((get_num_strings(requested_motifs) != 0) && (e_threshold != 0.0)) { die("Can't use -motif or -nmotifs with -ethresh."); } if ((get_num_strings(requested_motifs) != 0) && (order_string != NULL)) { die("Can't use -motif or -nmotifs with -order."); } if ((order_string != NULL) && (e_threshold != 0.0)) { die("Can't use -ethresh and -order."); } /* Prevent trying to build a complete or star model with ordering. */ if (order_string != NULL) { if (hmm_type == COMPLETE_HMM) die("Can't specify motif order with a completely connected model."); else if (hmm_type == STAR_HMM) die("Can't specify motif order with a star model."); } // Parse the order string. order_spacing = create_order(order_string); /********************************************** * READING THE MOTIFS **********************************************/ BOOLEAN_T read_file = FALSE; double pseudocount = 0; read_meme_file( meme_filename, "motif-file", // Take bg freq. from motif file. pseudocount, REQUIRE_PSPM, &num_motifs, motifs, &motif_occurrences, &has_reverse_strand, &background ); process_raw_motifs_for_model( &num_motifs, motifs, motif_occurrences, requested_motifs, has_reverse_strand, keep_unused, p_threshold, e_threshold, complexity_threshold, &order_spacing, &transp_freq, &spacer_ave, trans_pseudo, spacer_pseudo ); /********************************************** * BUILDING THE HMM **********************************************/ /* Build the motif-based HMM. */ if (hmm_type == LINEAR_HMM) { if (order_spacing != NULL) { reorder_motifs(order_spacing, &num_motifs, motifs); } else { die("No order specified for the motifs.\n" "For the linear model the motif file must contain motif occurence\n" "data or the motif order must be specified using " "the --order option."); } build_linear_hmm( background, order_spacing, spacer_states, motifs, num_motifs, fim, &the_hmm ); } else if (hmm_type == COMPLETE_HMM) { build_complete_hmm( background, spacer_states, motifs, num_motifs, transp_freq, spacer_ave, fim, &the_hmm ); } else if (hmm_type == STAR_HMM) { build_star_hmm( background, spacer_states, motifs, num_motifs, fim, &the_hmm ); } // Add some global information. copy_string(&(the_hmm->motif_file), meme_filename); /********************************************** * WRITING THE HMM **********************************************/ /* Print the header. */ if (print_header) write_header( program, "", description, meme_filename, NULL, NULL, stdout ); /* Write the HMM. */ write_mhmm(verbosity, the_hmm, stdout); /* Print the program parameters. */ if (print_params) { printf("Program parameters for mhmm\n"); printf(" MEME file: %s\n", meme_filename); printf(" Motifs:"); write_string_list(" ", requested_motifs, stdout); printf("\n"); printf(" Model topology: %s\n", convert_enum_type(hmm_type, HMM_STRS, NUM_HMM_T)); printf(" States per spacer: %d\n", spacer_states); printf(" Spacers are free-insertion modules: %s\n", boolean_to_string(fim)); printf("\n"); } free_array(background); free_string_list(requested_motifs); free_order(order_spacing); free_matrix(transp_freq); free_matrix(spacer_ave); for (i_motif = 0; i_motif < num_motifs; i_motif++) free_motif(&(motifs[i_motif])); free_mhmm(the_hmm); return(0); }
/*********************************************************************** * Free dynamic memory used by a given motif and free the structure. * To be useable by collections it takes a void * but expects * a MOTIF_T *. ***********************************************************************/ void destroy_motif (void * a_motif) { free_motif((MOTIF_T*)a_motif); free((MOTIF_T*)a_motif); }