Esempio n. 1
0
/***********************************************************************
 * Discard motifs that are not connected.
 ***********************************************************************/
static void throw_out_unused_motifs
  (MATRIX_T* transp_freq,
   MATRIX_T* spacer_ave,
   int*      num_motifs,
   MOTIF_T*  motifs)
{
  int i_motif, j_motif;
  ARRAY_T* row_sums;
  ARRAY_T* col_sums;

  // Extract the margins of the transition matrix.
  row_sums = get_matrix_row_sums(transp_freq);
  col_sums = get_matrix_col_sums(transp_freq);

  for (i_motif = 0; i_motif < *num_motifs; i_motif++) {

    // Is this row or column empty?
    if ((get_array_item(i_motif + 1, row_sums) == 0.0) ||
      (get_array_item(i_motif + 1, col_sums) == 0.0)) {

      if (verbosity >= NORMAL_VERBOSE) {
        fprintf(stderr, "Removing unused motif %s. No occurrences of this motif were found.\n",
        get_motif_id(&(motifs[i_motif])));
      }

      // Remove the row and column from the transition matrix.
      remove_matrix_row(i_motif + 1, transp_freq);
      remove_matrix_col(i_motif + 1, transp_freq);
      assert(get_num_rows(transp_freq) == get_num_cols(transp_freq));

      remove_matrix_row(i_motif + 1, spacer_ave);
      remove_matrix_col(i_motif + 1, spacer_ave);
      assert(get_num_rows(spacer_ave) == get_num_cols(spacer_ave));

      // Remove the motif from the array.
      for (j_motif = i_motif + 1; j_motif < *num_motifs; j_motif++) {
        free_motif(&(motifs[j_motif - 1]));
        copy_motif(&(motifs[j_motif]), &(motifs[j_motif - 1]));
      }
      free_motif(&(motifs[j_motif - 1]));
      (*num_motifs)--;
      i_motif--;

      // Recalculate the row and column sums.
      free_array(row_sums);
      free_array(col_sums);
      row_sums = get_matrix_row_sums(transp_freq);
      col_sums = get_matrix_col_sums(transp_freq);

    }
  }

  free_array(row_sums);
  free_array(col_sums);
}
Esempio n. 2
0
File: motif.c Progetto: CPFL/gmeme
/***********************************************************************
 * Free an array of motifs
 ***********************************************************************/
void free_motif_array(MOTIF_T *motif_array, int num) {
  int i;
  for (i = 0; i < num; ++i) {
    free_motif(motif_array+i);
  }
  free(motif_array);
}
Esempio n. 3
0
/**************************************************************************
 * Destroys a secondary motif. 
 * It takes a void * pointer so it can be used in the collection objects.
 **************************************************************************/
void destroy_secondary_motif(void *p) {
  int i;
  SECONDARY_MOTIF_T *smotif = (SECONDARY_MOTIF_T*)p;
  free_motif(smotif->motif);
  free(smotif->motif);
  for (i = 0; i < 4; ++i) destroy_spacings((smotif->spacings)+i);
  if (smotif->sigs) free(smotif->sigs);
  if (smotif->seqs) free(smotif->seqs);
  free(smotif);
}
Esempio n. 4
0
/*************************************************************************
 * int main
 *************************************************************************/
int main(int argc, char *argv[])
{
  /* Data structures. */
  int       num_motifs;         /* The number of motifs in the model. */
  MOTIF_T   motifs[2 * MAX_MOTIFS]; /* The motifs. */
  STRING_LIST_T* motif_occurrences = NULL; /* Strings describing occurrences of
                                              motifs */
  BOOLEAN_T has_reverse_strand = FALSE;    /* MEME file contained both strands */
  ARRAY_T*  background;         /* Background probs for alphabet. */
  ORDER_T*  order_spacing;      /* Linear HMM order and spacing. */
  MATRIX_T* transp_freq = NULL; /* Matrix of inter-motif transitions freqs. */
  MATRIX_T* spacer_ave = NULL;  /* Matrix of average spacer lengths. */
  MHMM_T *  the_hmm = NULL;     /* The HMM being constructed. */

  /* Command line parameters. */
  char *    meme_filename;      /* Input file containg motifs. */
  char *    hmm_type_str;       /* HMM type. */
  HMM_T     hmm_type;
  STRING_LIST_T* requested_motifs; /* Indices of requested motifs. */
  int       request_n;          /* The user asked for the first n motifs. */
  double    e_threshold;        /* E-value threshold for motif inclusion. */
  double    complexity_threshold; // For eliminating low-complexity motifs.
  double    p_threshold;        /* p-value threshold for motif occurences. */
  char*     order_string;       /* Motif order and spacing. */
  int       spacer_states;      /* Number of states in each spacer. */
  BOOLEAN_T fim;                /* Represent spacers as free insertion
				   modules? */
  BOOLEAN_T keep_unused;        // Drop unused inter-motif transitions?
  double    trans_pseudo;       /* Transition pseudocount. */
  double    spacer_pseudo;      // Spacer (self-loop) pseudocount. */
  char*     description;        // Descriptive text to be stored in model.
  BOOLEAN_T print_header;       /* Print file header? */
  BOOLEAN_T print_params;       /* Print parameter summary? */
  BOOLEAN_T print_time;         /* Print timing data (dummy: always false). */

  /* Local variables. */
  int       i_motif;

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/
  // Define command line options.
  cmdoption const options[] = {
    {"type", OPTIONAL_VALUE},
    {"description", REQUIRED_VALUE},
    {"motif", REQUIRED_VALUE},
    {"nmotifs", REQUIRED_VALUE},
    {"ethresh", REQUIRED_VALUE},
    {"lowcomp", REQUIRED_VALUE},
    {"pthresh", REQUIRED_VALUE},
    {"order", REQUIRED_VALUE},
    {"nspacer", REQUIRED_VALUE},
    {"fim", NO_VALUE},
    {"keep-unused", NO_VALUE},
    {"transpseudo", REQUIRED_VALUE},
    {"spacerpseudo", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE},
    {"noheader", NO_VALUE},
    {"noparams", NO_VALUE},
    {"notime", NO_VALUE},
    {"quiet", NO_VALUE},
  };
  int option_count = 18;
  int option_index = 0;

  // Define the usage message.
  char      usage[1000] = "";
  strcat(usage, "USAGE: mhmm [options] <MEME file>\n");
  strcat(usage, "\n");
  strcat(usage, "   Options:\n");
  strcat(usage, "     --type [linear|complete|star] (default=linear)\n");
  strcat(usage, "     --description <string> (may be repeated)\n");
  strcat(usage, "     --motif <motif #> (may be repeated)\n");
  strcat(usage, "     --nmotifs <#>\n");
  strcat(usage, "     --ethresh <E-value>\n");
  strcat(usage, "     --lowcomp <value>\n");
  strcat(usage, "     --pthresh <p-value>\n");
  strcat(usage, "     --order <string>\n");
  strcat(usage, "     --nspacer <spacer length> (default=1)\n");
  strcat(usage, "     --fim\n");
  strcat(usage, "     --keep-unused\n");
  strcat(usage, "     --transpseudo <pseudocount>\n");
  strcat(usage, "     --spacerpseudo <pseudocount>\n");
  strcat(usage, "     --verbosity 1|2|3|4|5 (default=2)\n");
  strcat(usage, "     --noheader\n");
  strcat(usage, "     --noparams\n");
  strcat(usage, "     --notime\n");
  strcat(usage, "     --quiet\n");
  strcat(usage, "\n");

  /* Make sure various options are set to NULL or defaults. */
  meme_filename = NULL;
  hmm_type_str = NULL;
  hmm_type = INVALID_HMM;
  requested_motifs = new_string_list();
  request_n = 0;
  e_threshold = 0.0;
  complexity_threshold = 0.0;
  p_threshold = 0.0;
  order_string = NULL;
  spacer_states = DEFAULT_SPACER_STATES,
  fim = FALSE;
  keep_unused = FALSE;
  trans_pseudo = DEFAULT_TRANS_PSEUDO;
  spacer_pseudo = DEFAULT_SPACER_PSEUDO;
  description = NULL;
  print_header = TRUE;
  print_params = TRUE;
  print_time = FALSE;

	simple_setopt(argc, argv, option_count, options);

  // Parse the command line.
  while (1) { 
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;


    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
    	simple_getopterror(&message);
      die("Error processing command line options (%s)\n", message);
    }

    if (strcmp(option_name, "type") == 0) {
			if (option_value != NULL) {
      	hmm_type_str = option_value;
			}
    } else if (strcmp(option_name, "description") == 0) {
      description = option_value;
    } else if (strcmp(option_name, "motif") == 0) {
      add_string(option_value, requested_motifs);
    } else if (strcmp(option_name, "nmotifs") == 0) {
      request_n = atoi(option_value);
    } else if (strcmp(option_name, "ethresh") == 0) {
      e_threshold = atof(option_value);
    } else if (strcmp(option_name, "lowcomp") == 0) {
      complexity_threshold = atof(option_value);
    } else if (strcmp(option_name, "pthresh") == 0) {
      p_threshold = atof(option_value);
    } else if (strcmp(option_name, "order") == 0) {
      order_string = option_value;
    } else if (strcmp(option_name, "nspacer") == 0) {
      spacer_states = atoi(option_value);
    } else if (strcmp(option_name, "fim") == 0) {
      fim = TRUE;
    } else if (strcmp(option_name, "keep-unused") == 0) {
      keep_unused = TRUE;
    } else if (strcmp(option_name, "transpseudo") == 0) {
      trans_pseudo = atof(option_value);
    } else if (strcmp(option_name, "spacerpseudo") == 0) {
      spacer_pseudo = atof(option_value);
    } else if (strcmp(option_name, "verbosity") == 0) {
      verbosity = (VERBOSE_T)atoi(option_value);
    } else if (strcmp(option_name, "noheader") == 0) {
      print_header = FALSE;
    } else if (strcmp(option_name, "noparams") == 0) {
      print_params = FALSE;
    } else if (strcmp(option_name, "notime") == 0) {
      print_time = FALSE;
    } else if (strcmp(option_name, "quiet") == 0) {
      print_header = print_params = print_time = FALSE;
      verbosity = QUIET_VERBOSE;
    }
  }

  // Read the single required argument.
  if (option_index + 1 != argc) {
    fprintf(stderr, "%s", usage);
    exit(1);
  }
  meme_filename = argv[option_index];

  // Set up motif requests. 
  if (request_n != 0) {
    if (get_num_strings(requested_motifs) != 0) {
      die("Can't combine the -motif and -nmotifs options.\n");
    } else {
      for (i_motif = 0; i_motif < request_n; i_motif++) {
        char motif_id[MAX_MOTIF_ID_LENGTH + 1];
        sprintf(motif_id, "%d", i_motif + 1);
        add_string(motif_id, requested_motifs);
      }
    }
  }

  /* Set the model type. */
  hmm_type = convert_enum_type_str(hmm_type_str, LINEAR_HMM, HMM_STRS, 
				   NUM_HMM_T);

  /* Gotta have positive spacer length. */
  if (spacer_states <= 0) {
    die("Negative spacer length (%d).\n", spacer_states);
  }

  /* Make sure motifs weren't selected redundantly. */
  // FIXME: Add tests for complexity threshold.
  if ((get_num_strings(requested_motifs) != 0) && (e_threshold != 0.0)) {
    die("Can't use -motif or -nmotifs with -ethresh.");
  }
  if ((get_num_strings(requested_motifs) != 0) && (order_string != NULL)) {
    die("Can't use -motif or -nmotifs with -order.");
  }
  if ((order_string != NULL) && (e_threshold != 0.0)) {
    die("Can't use -ethresh and -order.");
  }

  /* Prevent trying to build a complete or star model with ordering. */
  if (order_string != NULL) {
    if (hmm_type == COMPLETE_HMM) 
      die("Can't specify motif order with a completely connected model.");
    else if (hmm_type == STAR_HMM)
      die("Can't specify motif order with a star model.");
  } 

  // Parse the order string. 
  order_spacing = create_order(order_string);

  /**********************************************
   * READING THE MOTIFS
   **********************************************/

  BOOLEAN_T read_file = FALSE;
  double pseudocount = 0;

  read_meme_file(
		 meme_filename,
		 "motif-file", // Take bg freq. from motif file.
		 pseudocount,
     REQUIRE_PSPM,
		 &num_motifs,
		 motifs,
		 &motif_occurrences,
		 &has_reverse_strand,
		 &background
		 );

  process_raw_motifs_for_model(
       &num_motifs,
       motifs,
       motif_occurrences,
       requested_motifs,
       has_reverse_strand,
       keep_unused,
       p_threshold,
       e_threshold, 
       complexity_threshold, 
       &order_spacing,
       &transp_freq,
       &spacer_ave,
       trans_pseudo,
       spacer_pseudo
  );

  /**********************************************
   * BUILDING THE HMM
   **********************************************/

  /* Build the motif-based HMM. */
  if (hmm_type == LINEAR_HMM) {

    if (order_spacing != NULL) {
      reorder_motifs(order_spacing, &num_motifs, motifs);
    }
    else {
      die("No order specified for the motifs.\n"
          "For the linear model the motif file must contain motif occurence\n" 
          "data or the motif order must be specified using "
          "the --order option.");
    }

    build_linear_hmm(
      background,
		  order_spacing,
		  spacer_states,
		  motifs,
		  num_motifs, 
		  fim,
		  &the_hmm
    );

  } else if (hmm_type == COMPLETE_HMM) {

    build_complete_hmm(
      background,
		  spacer_states,
		  motifs,
		  num_motifs,
		  transp_freq,
		  spacer_ave,
		  fim,
		  &the_hmm
    );

  } else if (hmm_type == STAR_HMM) {

    build_star_hmm(
      background,
		  spacer_states,
		  motifs,
		  num_motifs,
		  fim,
		  &the_hmm
    );

  }

  // Add some global information.
  copy_string(&(the_hmm->motif_file), meme_filename);

  /**********************************************
   * WRITING THE HMM
   **********************************************/

  /* Print the header. */
  if (print_header)
    write_header(
     program, 
     "",
		 description,
		 meme_filename,
		 NULL,
		 NULL, 
		 stdout
    );

  /* Write the HMM. */
  write_mhmm(verbosity, the_hmm, stdout);

  /* Print the program parameters. */
  if (print_params) {
    printf("Program parameters for mhmm\n");
    printf("  MEME file: %s\n", meme_filename);
    printf("  Motifs:");
    write_string_list(" ", requested_motifs, stdout);
    printf("\n");
    printf("  Model topology: %s\n",
	   convert_enum_type(hmm_type, HMM_STRS, NUM_HMM_T));
    printf("  States per spacer: %d\n", spacer_states);
    printf("  Spacers are free-insertion modules: %s\n",
	   boolean_to_string(fim));
    printf("\n");
  }

  free_array(background);
  free_string_list(requested_motifs);
  free_order(order_spacing);
  free_matrix(transp_freq);
  free_matrix(spacer_ave);
  for (i_motif = 0; i_motif < num_motifs; i_motif++)
    free_motif(&(motifs[i_motif]));
  free_mhmm(the_hmm);
  return(0);
}
Esempio n. 5
0
File: motif.c Progetto: CPFL/gmeme
/***********************************************************************
 * Free dynamic memory used by a given motif and free the structure.
 * To be useable by collections it takes a void * but expects
 * a MOTIF_T *.
 ***********************************************************************/
void destroy_motif
  (void * a_motif)
{
  free_motif((MOTIF_T*)a_motif);
  free((MOTIF_T*)a_motif);
}