Exemplo n.º 1
0
SAMPLE *get_sample_by_name(
  char *sample_name
)
{
  HASH_TABLE_ENTRY * hash_entry = hash_lookup_str(sample_name, ht_seq_names);
  return(hash_entry != NULL ? (SAMPLE *) hash_get_entry_value(hash_entry) : NULL);
} // get_sample_by_name
Exemplo n.º 2
0
/**
 * Creates a motif for a given mod using a simple frequency matrix.
 */
void create_simple_motif(SUMMARY_T* summary,
                         MOMO_OPTIONS_T* options,
                         MOD_INFO_T * mod_info) {
  int i;
  int j;
  
  const char* alph_letters = summary->alph_letters;
  
  // Create the frequency matrix
  MATRIX_T* freqs = NULL;
  freqs = get_count_matrix(freqs, mod_info->seq_list, NULL, options, summary);
  normalize_rows(0.0, freqs);
  
  // Create the motif
  MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T));
  motifinfo->motif = allocate_motif(mod_info->mod_name, "", summary->alph, freqs, NULL);
  motifinfo->seqs = arraylst_create();
  for (i = 0; i < arraylst_size(mod_info->seq_list); ++i) {
    SEQ_T* seqobject = options->eliminate_repeats ? hash_get_entry_value(arraylst_get(i, mod_info->seq_list)) : arraylst_get(i, mod_info->seq_list);
    arraylst_add(get_raw_sequence(seqobject), motifinfo->seqs);
  }
  motifinfo->fg_size = arraylst_size(mod_info->seq_list);
  arraylst_add(motifinfo, mod_info->motifinfos);
  
  // clean up
  free_matrix(freqs);
}
Exemplo n.º 3
0
/**
 * Remove sequences do not match a pattern from phospho and bg lists and update their respective count matrix
 */
void remove_sequences_and_update_matrix(char letter,
                                        int pos,
                                        ARRAYLST_T* seqs,
                                        MOTIFX_STATUS_T** status_array,
                                        int* num_active,
                                        MATRIX_T* count,
                                        SUMMARY_T* summary,
                                        MOMO_OPTIONS_T* options) {
  
  int i;
  const char* alph_letters = summary->alph_letters;
  
  // Look through phospho_seqs and remove sequences. Update phospho_seqs
  for (i = 0; i < arraylst_size(seqs); ++i) {
    char* curr_seq = get_raw_sequence((SEQ_T*) (options->eliminate_repeats ? hash_get_entry_value((HASH_TABLE_ENTRY*) arraylst_get(i, seqs)) : arraylst_get(i, seqs)));
    // For anything active that does not match the pattern, turn it inactive.
    MOTIFX_STATUS_T status = (*status_array)[i];
    if (status == ACTIVE && curr_seq[pos] != letter) {
      *num_active = *num_active - 1;
      (*status_array)[i] = INACTIVE;
    }
  }
  count = get_count_matrix(count, seqs, status_array, options, summary);
}
Exemplo n.º 4
0
/**
 * Recursive function. Creates and stores a motif using the motif-x
 * algorithm until no more are left.
 */
void create_motifx_motif(ARRAYLST_T* phospho_seqs,
                         ARRAYLST_T* bg_seqs,
                         MOTIFX_STATUS_T** phospho_status,
                         MOTIFX_STATUS_T** bg_status,
                         MATRIX_T* phospho_count,
                         MATRIX_T* bg_count,
                         int* num_active,
                         int* num_bg_active,
                         char* modname,
                         MOD_INFO_T* mod_info,
                         MOMO_OPTIONS_T* options,
                         SUMMARY_T* summary) {
  int i;
  int j;
  
  const char* alph_letters = summary->alph_letters;
  
  // Initialize pattern, sequence count, bg sequence count, and overall score for this motif.
  char* pattern = mm_malloc(options->width + 1);
  for (i = 0; i < options->width; ++i) {
    pattern[i] = 'X';
  }
  pattern[options->width] = '\0';
  int* num_active_copy = mm_malloc(sizeof(int));
  *num_active_copy = *num_active;
  int* num_bg_active_copy = mm_malloc(sizeof(int));
  *num_bg_active_copy = *num_bg_active;
  double* motif_score = mm_malloc(sizeof(double));
  *motif_score = 0;
  
  // Set the pattern, num active copy, num bg active copy, motif score, and get a count of the sequences
  MATRIX_T* result_count_matrix = add_to_pattern(pattern, phospho_seqs, bg_seqs, phospho_status, bg_status, num_active_copy, num_bg_active_copy, phospho_count, bg_count, motif_score, summary, options);
  
  // If any of the characters are not X, then we have found a pattern
  BOOLEAN_T found_pattern = FALSE;
  for (i = 0; i < options->width; ++i) {
    if (pattern[i] != 'X') {
      found_pattern = TRUE;
    }
  }
  
  // If there is a pattern, store the pattern and call create_motifx_motif again.
  if (found_pattern) {
    // fill out the rest of the pattern (e.g. if you have pattern ..ASAAA, and realize the actual pattern is A.ASAAA
    for (i = 0; i < options->width; i++) {
      for (j = 0; j < strlen(alph_letters); j++) {
        if ((int) get_matrix_cell_defcheck(i, j, result_count_matrix) == *num_active_copy) {
          pattern[i] = alph_letters[j];
        }
      }
    }
    
    // create the pattern name
    char* pattern_name = mm_malloc(strlen(pattern) + strlen(modname) + 3);
    pattern_name[0] = '\0';
    strncat(pattern_name, pattern, strlen(pattern)/2);
    strncat(pattern_name, "_", 1);
    strncat(pattern_name, modname, strlen(modname));
    strncat(pattern_name, "_", 1);
    strncat(pattern_name, pattern + strlen(pattern)/2 + 1, strlen(pattern)/2);
    
    // convert this count matrix into frequencies
    normalize_rows(0.0, result_count_matrix);
    
    // Store this motif
    MOTIF_INFO_T* motifinfo = mm_malloc(sizeof(MOTIF_INFO_T));
    MOTIF_T* motif = allocate_motif(pattern_name, "", summary->alph, result_count_matrix, NULL);
    set_motif_nsites(motif, *num_active_copy);
    motifinfo->motif = motif;
    motifinfo->seqs = arraylst_create();
    motifinfo->score = *motif_score;
    motifinfo->fg_match = *num_active_copy;
    motifinfo->fg_size = *num_active;
    motifinfo->bg_match = *num_bg_active_copy;
    motifinfo->bg_size = *num_bg_active;
    for (i = 0; i < arraylst_size(phospho_seqs); ++i) {
      MOTIFX_STATUS_T status = (*phospho_status)[i];
      if (status == ACTIVE) {
        SEQ_T* active_sequence = (options->eliminate_repeats) ? hash_get_entry_value(arraylst_get(i, phospho_seqs)) : arraylst_get(i, phospho_seqs);
        arraylst_add(get_raw_sequence(active_sequence), motifinfo->seqs);
      }
    }
    arraylst_add(motifinfo, mod_info->motifinfos);
    
    // delete the sequences from this motif. turn inactive into active.
    delete_sequences(phospho_status, arraylst_size(phospho_seqs));
    delete_sequences(bg_status, arraylst_size(bg_seqs));
    
    // update the count of number of actives
    *num_active = *num_active - *num_active_copy;
    *num_bg_active = *num_bg_active - *num_bg_active_copy;
    
    // recalculate phospho count and bg count.
    phospho_count = get_count_matrix(phospho_count, phospho_seqs, phospho_status, options, summary);
    bg_count = get_count_matrix(bg_count, bg_seqs, bg_status, options, summary);
    
    // free up space
    myfree(pattern);
    myfree(num_active_copy);
    myfree(num_bg_active_copy);
    myfree(motif_score);
    myfree(pattern_name);
    
    // try to create another motif.
    create_motifx_motif(phospho_seqs,
                        bg_seqs,
                        phospho_status,
                        bg_status,
                        phospho_count,
                        bg_count,
                        num_active,
                        num_bg_active,
                        modname,
                        mod_info,
                        options,
                        summary);
  }
  // free up space
  myfree(pattern);
  myfree(num_active_copy);
  myfree(num_bg_active_copy);
  myfree(motif_score);
}