예제 #1
0
/**
 * Creates a motif for a given mod using a simple frequency matrix.
 */
void create_simple_motif(SUMMARY_T* summary,
                         MOMO_OPTIONS_T* options,
                         MOD_INFO_T * mod_info) {
  int i;
  int j;
  
  const char* alph_letters = summary->alph_letters;
  
  // Create the frequency matrix
  MATRIX_T* freqs = NULL;
  freqs = get_count_matrix(freqs, mod_info->seq_list, NULL, options, summary);
  normalize_rows(0.0, freqs);
  
  // Create the motif
  MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T));
  motifinfo->motif = allocate_motif(mod_info->mod_name, "", summary->alph, freqs, NULL);
  motifinfo->seqs = arraylst_create();
  for (i = 0; i < arraylst_size(mod_info->seq_list); ++i) {
    SEQ_T* seqobject = options->eliminate_repeats ? hash_get_entry_value(arraylst_get(i, mod_info->seq_list)) : arraylst_get(i, mod_info->seq_list);
    arraylst_add(get_raw_sequence(seqobject), motifinfo->seqs);
  }
  motifinfo->fg_size = arraylst_size(mod_info->seq_list);
  arraylst_add(motifinfo, mod_info->motifinfos);
  
  // clean up
  free_matrix(freqs);
}
예제 #2
0
/***********************************************************************
 * Converts a TRANSFAC motif to a MEME motif.
 * Caller is responsible for freeing the returned MOTIF_T.
 ***********************************************************************/
MOTIF_T *convert_transfac_motif_to_meme_motif(
    char *id,
    int pseudocount,
    ARRAY_T *bg,
    TRANSFAC_MOTIF_T *motif
) {
    MATRIX_T *counts = get_transfac_counts(motif);
    if (counts == NULL) {
        die(
            "Unable to convert TRANSFAC motif %s to MEME motif: "
            "missing counts matrix.",
            id
        );
    };

    // Convert the motif counts to frequencies.
    int num_bases = get_num_cols(counts);
    int motif_width = get_num_rows(counts);
    int motif_position = 0;
    MATRIX_T *freqs = allocate_matrix(motif_width, num_bases);
    for (motif_position = 0; motif_position < motif_width; ++motif_position) {
        int i_base = 0;
        int num_seqs = 0; // motif columns may have different counts
        for (i_base = 0; i_base < num_bases; i_base++) {
            num_seqs += get_matrix_cell(motif_position, i_base, counts);
        }
        for (i_base = 0; i_base < num_bases; i_base++) {
            double freq =
                (get_matrix_cell(motif_position, i_base, counts)
                 + (pseudocount * get_array_item(i_base, bg))) / (num_seqs + pseudocount);
            set_matrix_cell(motif_position, i_base, freq, freqs);
        }
    }

    MOTIF_T *meme_motif = allocate_motif(id, DNA_ALPH, NULL, freqs);
    calc_motif_ambigs(meme_motif);
    return meme_motif;
}
예제 #3
0
/**
 * Recursive function. Creates and stores a motif using the motif-x
 * algorithm until no more are left.
 */
void create_motifx_motif(ARRAYLST_T* phospho_seqs,
                         ARRAYLST_T* bg_seqs,
                         MOTIFX_STATUS_T** phospho_status,
                         MOTIFX_STATUS_T** bg_status,
                         MATRIX_T* phospho_count,
                         MATRIX_T* bg_count,
                         int* num_active,
                         int* num_bg_active,
                         char* modname,
                         MOD_INFO_T* mod_info,
                         MOMO_OPTIONS_T* options,
                         SUMMARY_T* summary) {
  int i;
  int j;
  
  const char* alph_letters = summary->alph_letters;
  
  // Initialize pattern, sequence count, bg sequence count, and overall score for this motif.
  char* pattern = mm_malloc(options->width + 1);
  for (i = 0; i < options->width; ++i) {
    pattern[i] = 'X';
  }
  pattern[options->width] = '\0';
  int* num_active_copy = mm_malloc(sizeof(int));
  *num_active_copy = *num_active;
  int* num_bg_active_copy = mm_malloc(sizeof(int));
  *num_bg_active_copy = *num_bg_active;
  double* motif_score = mm_malloc(sizeof(double));
  *motif_score = 0;
  
  // Set the pattern, num active copy, num bg active copy, motif score, and get a count of the sequences
  MATRIX_T* result_count_matrix = add_to_pattern(pattern, phospho_seqs, bg_seqs, phospho_status, bg_status, num_active_copy, num_bg_active_copy, phospho_count, bg_count, motif_score, summary, options);
  
  // If any of the characters are not X, then we have found a pattern
  BOOLEAN_T found_pattern = FALSE;
  for (i = 0; i < options->width; ++i) {
    if (pattern[i] != 'X') {
      found_pattern = TRUE;
    }
  }
  
  // If there is a pattern, store the pattern and call create_motifx_motif again.
  if (found_pattern) {
    // fill out the rest of the pattern (e.g. if you have pattern ..ASAAA, and realize the actual pattern is A.ASAAA
    for (i = 0; i < options->width; i++) {
      for (j = 0; j < strlen(alph_letters); j++) {
        if ((int) get_matrix_cell_defcheck(i, j, result_count_matrix) == *num_active_copy) {
          pattern[i] = alph_letters[j];
        }
      }
    }
    
    // create the pattern name
    char* pattern_name = mm_malloc(strlen(pattern) + strlen(modname) + 3);
    pattern_name[0] = '\0';
    strncat(pattern_name, pattern, strlen(pattern)/2);
    strncat(pattern_name, "_", 1);
    strncat(pattern_name, modname, strlen(modname));
    strncat(pattern_name, "_", 1);
    strncat(pattern_name, pattern + strlen(pattern)/2 + 1, strlen(pattern)/2);
    
    // convert this count matrix into frequencies
    normalize_rows(0.0, result_count_matrix);
    
    // Store this motif
    MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T));
    MOTIF_T* motif = allocate_motif(pattern_name, "", summary->alph, result_count_matrix, NULL);
    set_motif_nsites(motif, *num_active_copy);
    motifinfo->motif = motif;
    motifinfo->seqs = arraylst_create();
    motifinfo->score = *motif_score;
    motifinfo->fg_match = *num_active_copy;
    motifinfo->fg_size = *num_active;
    motifinfo->bg_match = *num_bg_active_copy;
    motifinfo->bg_size = *num_bg_active;
    for (i = 0; i < arraylst_size(phospho_seqs); ++i) {
      MOTIFX_STATUS_T status = (*phospho_status)[i];
      if (status == ACTIVE) {
        SEQ_T* active_sequence = (options->eliminate_repeats) ? hash_get_entry_value(arraylst_get(i, phospho_seqs)) : arraylst_get(i, phospho_seqs);
        arraylst_add(get_raw_sequence(active_sequence), motifinfo->seqs);
      }
    }
    arraylst_add(motifinfo, mod_info->motifinfos);
    
    // delete the sequences from this motif. turn inactive into active.
    delete_sequences(phospho_status, arraylst_size(phospho_seqs));
    delete_sequences(bg_status, arraylst_size(bg_seqs));
    
    // update the count of number of actives
    *num_active = *num_active - *num_active_copy;
    *num_bg_active = *num_bg_active - *num_bg_active_copy;
    
    // recalculate phospho count and bg count.
    phospho_count = get_count_matrix(phospho_count, phospho_seqs, phospho_status, options, summary);
    bg_count = get_count_matrix(bg_count, bg_seqs, bg_status, options, summary);
    
    // free up space
    myfree(pattern);
    myfree(num_active_copy);
    myfree(num_bg_active_copy);
    myfree(motif_score);
    myfree(pattern_name);
    
    // try to create another motif.
    create_motifx_motif(phospho_seqs,
                        bg_seqs,
                        phospho_status,
                        bg_status,
                        phospho_count,
                        bg_count,
                        num_active,
                        num_bg_active,
                        modname,
                        mod_info,
                        options,
                        summary);
  }
  // free up space
  myfree(pattern);
  myfree(num_active_copy);
  myfree(num_bg_active_copy);
  myfree(motif_score);
}