/** * Creates a motif for a given mod using a simple frequency matrix. */ void create_simple_motif(SUMMARY_T* summary, MOMO_OPTIONS_T* options, MOD_INFO_T * mod_info) { int i; int j; const char* alph_letters = summary->alph_letters; // Create the frequency matrix MATRIX_T* freqs = NULL; freqs = get_count_matrix(freqs, mod_info->seq_list, NULL, options, summary); normalize_rows(0.0, freqs); // Create the motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); motifinfo->motif = allocate_motif(mod_info->mod_name, "", summary->alph, freqs, NULL); motifinfo->seqs = arraylst_create(); for (i = 0; i < arraylst_size(mod_info->seq_list); ++i) { SEQ_T* seqobject = options->eliminate_repeats ? hash_get_entry_value(arraylst_get(i, mod_info->seq_list)) : arraylst_get(i, mod_info->seq_list); arraylst_add(get_raw_sequence(seqobject), motifinfo->seqs); } motifinfo->fg_size = arraylst_size(mod_info->seq_list); arraylst_add(motifinfo, mod_info->motifinfos); // clean up free_matrix(freqs); }
/*********************************************************************** * Converts a TRANSFAC motif to a MEME motif. * Caller is responsible for freeing the returned MOTIF_T. ***********************************************************************/ MOTIF_T *convert_transfac_motif_to_meme_motif( char *id, int pseudocount, ARRAY_T *bg, TRANSFAC_MOTIF_T *motif ) { MATRIX_T *counts = get_transfac_counts(motif); if (counts == NULL) { die( "Unable to convert TRANSFAC motif %s to MEME motif: " "missing counts matrix.", id ); }; // Convert the motif counts to frequencies. int num_bases = get_num_cols(counts); int motif_width = get_num_rows(counts); int motif_position = 0; MATRIX_T *freqs = allocate_matrix(motif_width, num_bases); for (motif_position = 0; motif_position < motif_width; ++motif_position) { int i_base = 0; int num_seqs = 0; // motif columns may have different counts for (i_base = 0; i_base < num_bases; i_base++) { num_seqs += get_matrix_cell(motif_position, i_base, counts); } for (i_base = 0; i_base < num_bases; i_base++) { double freq = (get_matrix_cell(motif_position, i_base, counts) + (pseudocount * get_array_item(i_base, bg))) / (num_seqs + pseudocount); set_matrix_cell(motif_position, i_base, freq, freqs); } } MOTIF_T *meme_motif = allocate_motif(id, DNA_ALPH, NULL, freqs); calc_motif_ambigs(meme_motif); return meme_motif; }
/** * Recursive function. Creates and stores a motif using the motif-x * algorithm until no more are left. */ void create_motifx_motif(ARRAYLST_T* phospho_seqs, ARRAYLST_T* bg_seqs, MOTIFX_STATUS_T** phospho_status, MOTIFX_STATUS_T** bg_status, MATRIX_T* phospho_count, MATRIX_T* bg_count, int* num_active, int* num_bg_active, char* modname, MOD_INFO_T* mod_info, MOMO_OPTIONS_T* options, SUMMARY_T* summary) { int i; int j; const char* alph_letters = summary->alph_letters; // Initialize pattern, sequence count, bg sequence count, and overall score for this motif. char* pattern = mm_malloc(options->width + 1); for (i = 0; i < options->width; ++i) { pattern[i] = 'X'; } pattern[options->width] = '\0'; int* num_active_copy = mm_malloc(sizeof(int)); *num_active_copy = *num_active; int* num_bg_active_copy = mm_malloc(sizeof(int)); *num_bg_active_copy = *num_bg_active; double* motif_score = mm_malloc(sizeof(double)); *motif_score = 0; // Set the pattern, num active copy, num bg active copy, motif score, and get a count of the sequences MATRIX_T* result_count_matrix = add_to_pattern(pattern, phospho_seqs, bg_seqs, phospho_status, bg_status, num_active_copy, num_bg_active_copy, phospho_count, bg_count, motif_score, summary, options); // If any of the characters are not X, then we have found a pattern BOOLEAN_T found_pattern = FALSE; for (i = 0; i < options->width; ++i) { if (pattern[i] != 'X') { found_pattern = TRUE; } } // If there is a pattern, store the pattern and call create_motifx_motif again. if (found_pattern) { // fill out the rest of the pattern (e.g. if you have pattern ..ASAAA, and realize the actual pattern is A.ASAAA for (i = 0; i < options->width; i++) { for (j = 0; j < strlen(alph_letters); j++) { if ((int) get_matrix_cell_defcheck(i, j, result_count_matrix) == *num_active_copy) { pattern[i] = alph_letters[j]; } } } // create the pattern name char* pattern_name = mm_malloc(strlen(pattern) + strlen(modname) + 3); pattern_name[0] = '\0'; strncat(pattern_name, pattern, strlen(pattern)/2); strncat(pattern_name, "_", 1); strncat(pattern_name, modname, strlen(modname)); strncat(pattern_name, "_", 1); strncat(pattern_name, pattern + strlen(pattern)/2 + 1, strlen(pattern)/2); // convert this count matrix into frequencies normalize_rows(0.0, result_count_matrix); // Store this motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); MOTIF_T* motif = allocate_motif(pattern_name, "", summary->alph, result_count_matrix, NULL); set_motif_nsites(motif, *num_active_copy); motifinfo->motif = motif; motifinfo->seqs = arraylst_create(); motifinfo->score = *motif_score; motifinfo->fg_match = *num_active_copy; motifinfo->fg_size = *num_active; motifinfo->bg_match = *num_bg_active_copy; motifinfo->bg_size = *num_bg_active; for (i = 0; i < arraylst_size(phospho_seqs); ++i) { MOTIFX_STATUS_T status = (*phospho_status)[i]; if (status == ACTIVE) { SEQ_T* active_sequence = (options->eliminate_repeats) ? hash_get_entry_value(arraylst_get(i, phospho_seqs)) : arraylst_get(i, phospho_seqs); arraylst_add(get_raw_sequence(active_sequence), motifinfo->seqs); } } arraylst_add(motifinfo, mod_info->motifinfos); // delete the sequences from this motif. turn inactive into active. delete_sequences(phospho_status, arraylst_size(phospho_seqs)); delete_sequences(bg_status, arraylst_size(bg_seqs)); // update the count of number of actives *num_active = *num_active - *num_active_copy; *num_bg_active = *num_bg_active - *num_bg_active_copy; // recalculate phospho count and bg count. phospho_count = get_count_matrix(phospho_count, phospho_seqs, phospho_status, options, summary); bg_count = get_count_matrix(bg_count, bg_seqs, bg_status, options, summary); // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); myfree(pattern_name); // try to create another motif. create_motifx_motif(phospho_seqs, bg_seqs, phospho_status, bg_status, phospho_count, bg_count, num_active, num_bg_active, modname, mod_info, options, summary); } // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); }