static MOTIF_T* post_process_motif(MREAD_T *mread, MOTIF_T *motif) { ARRAY_T *bg; int site_count; if (motif == NULL) return NULL; assert(motif->alph != INVALID_ALPH); if (motif->freqs) normalize_motif(motif, 0.00001); site_count = (motif->num_sites > 0 ? motif->num_sites : DEFAULT_SITES); if (motif->freqs != NULL && motif->scores != NULL) { // validate? May not be possible for protein motifs as MEME tweaks the PSSM } else if (motif->scores != NULL) { // calculate the freqs motif->freqs = convert_scores_into_freqs(motif->alph, motif->scores, mread->motif_bg, site_count, PSEUDO); } else if (motif->freqs != NULL) { // calculate the scores motif->scores = convert_freqs_into_scores(motif->alph, motif->freqs, mread->motif_bg, site_count, PSEUDO); } else { die("Motif with no PSPM or PSSM should not get here!\n"); } apply_pseudocount_to_motif(motif, mread->pseudo_bg, mread->pseudo_total); motif->complexity = compute_motif_complexity(motif); if (mread->options & CALC_AMBIGS) calc_motif_ambigs(motif); if (mread->trim) trim_motif_by_bit_threshold(motif, mread->trim_bits); return motif; }
/*********************************************************************** * Converts a TRANSFAC motif to a MEME motif. * Caller is responsible for freeing the returned MOTIF_T. ***********************************************************************/ MOTIF_T *convert_transfac_motif_to_meme_motif( char *id, int pseudocount, ARRAY_T *bg, TRANSFAC_MOTIF_T *motif ) { MATRIX_T *counts = get_transfac_counts(motif); if (counts == NULL) { die( "Unable to convert TRANSFAC motif %s to MEME motif: " "missing counts matrix.", id ); }; // Convert the motif counts to frequencies. int num_bases = get_num_cols(counts); int motif_width = get_num_rows(counts); int motif_position = 0; MATRIX_T *freqs = allocate_matrix(motif_width, num_bases); for (motif_position = 0; motif_position < motif_width; ++motif_position) { int i_base = 0; int num_seqs = 0; // motif columns may have different counts for (i_base = 0; i_base < num_bases; i_base++) { num_seqs += get_matrix_cell(motif_position, i_base, counts); } for (i_base = 0; i_base < num_bases; i_base++) { double freq = (get_matrix_cell(motif_position, i_base, counts) + (pseudocount * get_array_item(i_base, bg))) / (num_seqs + pseudocount); set_matrix_cell(motif_position, i_base, freq, freqs); } } MOTIF_T *meme_motif = allocate_motif(id, DNA_ALPH, NULL, freqs); calc_motif_ambigs(meme_motif); return meme_motif; }