SAMPLE *get_sample_by_name( char *sample_name ) { HASH_TABLE_ENTRY * hash_entry = hash_lookup_str(sample_name, ht_seq_names); return(hash_entry != NULL ? (SAMPLE *) hash_get_entry_value(hash_entry) : NULL); } // get_sample_by_name
/** * Creates a motif for a given mod using a simple frequency matrix. */ void create_simple_motif(SUMMARY_T* summary, MOMO_OPTIONS_T* options, MOD_INFO_T * mod_info) { int i; int j; const char* alph_letters = summary->alph_letters; // Create the frequency matrix MATRIX_T* freqs = NULL; freqs = get_count_matrix(freqs, mod_info->seq_list, NULL, options, summary); normalize_rows(0.0, freqs); // Create the motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); motifinfo->motif = allocate_motif(mod_info->mod_name, "", summary->alph, freqs, NULL); motifinfo->seqs = arraylst_create(); for (i = 0; i < arraylst_size(mod_info->seq_list); ++i) { SEQ_T* seqobject = options->eliminate_repeats ? hash_get_entry_value(arraylst_get(i, mod_info->seq_list)) : arraylst_get(i, mod_info->seq_list); arraylst_add(get_raw_sequence(seqobject), motifinfo->seqs); } motifinfo->fg_size = arraylst_size(mod_info->seq_list); arraylst_add(motifinfo, mod_info->motifinfos); // clean up free_matrix(freqs); }
/** * Remove sequences do not match a pattern from phospho and bg lists and update their respective count matrix */ void remove_sequences_and_update_matrix(char letter, int pos, ARRAYLST_T* seqs, MOTIFX_STATUS_T** status_array, int* num_active, MATRIX_T* count, SUMMARY_T* summary, MOMO_OPTIONS_T* options) { int i; const char* alph_letters = summary->alph_letters; // Look through phospho_seqs and remove sequences. Update phospho_seqs for (i = 0; i < arraylst_size(seqs); ++i) { char* curr_seq = get_raw_sequence((SEQ_T*) (options->eliminate_repeats ? hash_get_entry_value((HASH_TABLE_ENTRY*) arraylst_get(i, seqs)) : arraylst_get(i, seqs))); // For anything active that does not match the pattern, turn it inactive. MOTIFX_STATUS_T status = (*status_array)[i]; if (status == ACTIVE && curr_seq[pos] != letter) { *num_active = *num_active - 1; (*status_array)[i] = INACTIVE; } } count = get_count_matrix(count, seqs, status_array, options, summary); }
/** * Recursive function. Creates and stores a motif using the motif-x * algorithm until no more are left. */ void create_motifx_motif(ARRAYLST_T* phospho_seqs, ARRAYLST_T* bg_seqs, MOTIFX_STATUS_T** phospho_status, MOTIFX_STATUS_T** bg_status, MATRIX_T* phospho_count, MATRIX_T* bg_count, int* num_active, int* num_bg_active, char* modname, MOD_INFO_T* mod_info, MOMO_OPTIONS_T* options, SUMMARY_T* summary) { int i; int j; const char* alph_letters = summary->alph_letters; // Initialize pattern, sequence count, bg sequence count, and overall score for this motif. char* pattern = mm_malloc(options->width + 1); for (i = 0; i < options->width; ++i) { pattern[i] = 'X'; } pattern[options->width] = '\0'; int* num_active_copy = mm_malloc(sizeof(int)); *num_active_copy = *num_active; int* num_bg_active_copy = mm_malloc(sizeof(int)); *num_bg_active_copy = *num_bg_active; double* motif_score = mm_malloc(sizeof(double)); *motif_score = 0; // Set the pattern, num active copy, num bg active copy, motif score, and get a count of the sequences MATRIX_T* result_count_matrix = add_to_pattern(pattern, phospho_seqs, bg_seqs, phospho_status, bg_status, num_active_copy, num_bg_active_copy, phospho_count, bg_count, motif_score, summary, options); // If any of the characters are not X, then we have found a pattern BOOLEAN_T found_pattern = FALSE; for (i = 0; i < options->width; ++i) { if (pattern[i] != 'X') { found_pattern = TRUE; } } // If there is a pattern, store the pattern and call create_motifx_motif again. if (found_pattern) { // fill out the rest of the pattern (e.g. if you have pattern ..ASAAA, and realize the actual pattern is A.ASAAA for (i = 0; i < options->width; i++) { for (j = 0; j < strlen(alph_letters); j++) { if ((int) get_matrix_cell_defcheck(i, j, result_count_matrix) == *num_active_copy) { pattern[i] = alph_letters[j]; } } } // create the pattern name char* pattern_name = mm_malloc(strlen(pattern) + strlen(modname) + 3); pattern_name[0] = '\0'; strncat(pattern_name, pattern, strlen(pattern)/2); strncat(pattern_name, "_", 1); strncat(pattern_name, modname, strlen(modname)); strncat(pattern_name, "_", 1); strncat(pattern_name, pattern + strlen(pattern)/2 + 1, strlen(pattern)/2); // convert this count matrix into frequencies normalize_rows(0.0, result_count_matrix); // Store this motif MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T)); MOTIF_T* motif = allocate_motif(pattern_name, "", summary->alph, result_count_matrix, NULL); set_motif_nsites(motif, *num_active_copy); motifinfo->motif = motif; motifinfo->seqs = arraylst_create(); motifinfo->score = *motif_score; motifinfo->fg_match = *num_active_copy; motifinfo->fg_size = *num_active; motifinfo->bg_match = *num_bg_active_copy; motifinfo->bg_size = *num_bg_active; for (i = 0; i < arraylst_size(phospho_seqs); ++i) { MOTIFX_STATUS_T status = (*phospho_status)[i]; if (status == ACTIVE) { SEQ_T* active_sequence = (options->eliminate_repeats) ? hash_get_entry_value(arraylst_get(i, phospho_seqs)) : arraylst_get(i, phospho_seqs); arraylst_add(get_raw_sequence(active_sequence), motifinfo->seqs); } } arraylst_add(motifinfo, mod_info->motifinfos); // delete the sequences from this motif. turn inactive into active. delete_sequences(phospho_status, arraylst_size(phospho_seqs)); delete_sequences(bg_status, arraylst_size(bg_seqs)); // update the count of number of actives *num_active = *num_active - *num_active_copy; *num_bg_active = *num_bg_active - *num_bg_active_copy; // recalculate phospho count and bg count. phospho_count = get_count_matrix(phospho_count, phospho_seqs, phospho_status, options, summary); bg_count = get_count_matrix(bg_count, bg_seqs, bg_status, options, summary); // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); myfree(pattern_name); // try to create another motif. create_motifx_motif(phospho_seqs, bg_seqs, phospho_status, bg_status, phospho_count, bg_count, num_active, num_bg_active, modname, mod_info, options, summary); } // free up space myfree(pattern); myfree(num_active_copy); myfree(num_bg_active_copy); myfree(motif_score); }