Exemplo n.º 1
0
static MOTIF_T* post_process_motif(MREAD_T *mread, MOTIF_T *motif) {
  ARRAY_T *bg;
  int site_count;
  if (motif == NULL) return NULL;
  assert(motif->alph != INVALID_ALPH);
  if (motif->freqs) normalize_motif(motif, 0.00001);
  site_count = (motif->num_sites > 0 ? motif->num_sites : DEFAULT_SITES);
  if (motif->freqs != NULL && motif->scores != NULL) {
    // validate? May not be possible for protein motifs as MEME tweaks the PSSM
  } else if (motif->scores != NULL) {
    // calculate the freqs
    motif->freqs = convert_scores_into_freqs(motif->alph, motif->scores, 
        mread->motif_bg, site_count, PSEUDO);
  } else if (motif->freqs != NULL) {
    // calculate the scores
    motif->scores = convert_freqs_into_scores(motif->alph, motif->freqs,
        mread->motif_bg, site_count, PSEUDO);
  } else {
    die("Motif with no PSPM or PSSM should not get here!\n");
  }

  apply_pseudocount_to_motif(motif, mread->pseudo_bg, mread->pseudo_total);
  motif->complexity = compute_motif_complexity(motif);
  if (mread->options & CALC_AMBIGS) calc_motif_ambigs(motif);
  if (mread->trim) trim_motif_by_bit_threshold(motif, mread->trim_bits);
  return motif;
}
Exemplo n.º 2
0
/***********************************************************************
 * Converts a TRANSFAC motif to a MEME motif.
 * Caller is responsible for freeing the returned MOTIF_T.
 ***********************************************************************/
MOTIF_T *convert_transfac_motif_to_meme_motif(
    char *id,
    int pseudocount,
    ARRAY_T *bg,
    TRANSFAC_MOTIF_T *motif
) {
    MATRIX_T *counts = get_transfac_counts(motif);
    if (counts == NULL) {
        die(
            "Unable to convert TRANSFAC motif %s to MEME motif: "
            "missing counts matrix.",
            id
        );
    };

    // Convert the motif counts to frequencies.
    int num_bases = get_num_cols(counts);
    int motif_width = get_num_rows(counts);
    int motif_position = 0;
    MATRIX_T *freqs = allocate_matrix(motif_width, num_bases);
    for (motif_position = 0; motif_position < motif_width; ++motif_position) {
        int i_base = 0;
        int num_seqs = 0; // motif columns may have different counts
        for (i_base = 0; i_base < num_bases; i_base++) {
            num_seqs += get_matrix_cell(motif_position, i_base, counts);
        }
        for (i_base = 0; i_base < num_bases; i_base++) {
            double freq =
                (get_matrix_cell(motif_position, i_base, counts)
                 + (pseudocount * get_array_item(i_base, bg))) / (num_seqs + pseudocount);
            set_matrix_cell(motif_position, i_base, freq, freqs);
        }
    }

    MOTIF_T *meme_motif = allocate_motif(id, DNA_ALPH, NULL, freqs);
    calc_motif_ambigs(meme_motif);
    return meme_motif;
}