Exemplo n.º 1
0
/*************************************************************************
 * int main
 *************************************************************************/
int main(int argc, char *argv[])
{
  /* Data structures. */
  int       num_motifs;         /* The number of motifs in the model. */
  MOTIF_T   motifs[2 * MAX_MOTIFS]; /* The motifs. */
  STRING_LIST_T* motif_occurrences = NULL; /* Strings describing occurrences of
                                              motifs */
  BOOLEAN_T has_reverse_strand = FALSE;    /* MEME file contained both strands */
  ARRAY_T*  background;         /* Background probs for alphabet. */
  ORDER_T*  order_spacing;      /* Linear HMM order and spacing. */
  MATRIX_T* transp_freq = NULL; /* Matrix of inter-motif transitions freqs. */
  MATRIX_T* spacer_ave = NULL;  /* Matrix of average spacer lengths. */
  MHMM_T *  the_hmm = NULL;     /* The HMM being constructed. */

  /* Command line parameters. */
  char *    meme_filename;      /* Input file containg motifs. */
  char *    hmm_type_str;       /* HMM type. */
  HMM_T     hmm_type;
  STRING_LIST_T* requested_motifs; /* Indices of requested motifs. */
  int       request_n;          /* The user asked for the first n motifs. */
  double    e_threshold;        /* E-value threshold for motif inclusion. */
  double    complexity_threshold; // For eliminating low-complexity motifs.
  double    p_threshold;        /* p-value threshold for motif occurences. */
  char*     order_string;       /* Motif order and spacing. */
  int       spacer_states;      /* Number of states in each spacer. */
  BOOLEAN_T fim;                /* Represent spacers as free insertion
				   modules? */
  BOOLEAN_T keep_unused;        // Drop unused inter-motif transitions?
  double    trans_pseudo;       /* Transition pseudocount. */
  double    spacer_pseudo;      // Spacer (self-loop) pseudocount. */
  char*     description;        // Descriptive text to be stored in model.
  BOOLEAN_T print_header;       /* Print file header? */
  BOOLEAN_T print_params;       /* Print parameter summary? */
  BOOLEAN_T print_time;         /* Print timing data (dummy: always false). */

  /* Local variables. */
  int       i_motif;

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/
  // Define command line options.
  cmdoption const options[] = {
    {"type", OPTIONAL_VALUE},
    {"description", REQUIRED_VALUE},
    {"motif", REQUIRED_VALUE},
    {"nmotifs", REQUIRED_VALUE},
    {"ethresh", REQUIRED_VALUE},
    {"lowcomp", REQUIRED_VALUE},
    {"pthresh", REQUIRED_VALUE},
    {"order", REQUIRED_VALUE},
    {"nspacer", REQUIRED_VALUE},
    {"fim", NO_VALUE},
    {"keep-unused", NO_VALUE},
    {"transpseudo", REQUIRED_VALUE},
    {"spacerpseudo", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE},
    {"noheader", NO_VALUE},
    {"noparams", NO_VALUE},
    {"notime", NO_VALUE},
    {"quiet", NO_VALUE},
  };
  int option_count = 18;
  int option_index = 0;

  // Define the usage message.
  char      usage[1000] = "";
  strcat(usage, "USAGE: mhmm [options] <MEME file>\n");
  strcat(usage, "\n");
  strcat(usage, "   Options:\n");
  strcat(usage, "     --type [linear|complete|star] (default=linear)\n");
  strcat(usage, "     --description <string> (may be repeated)\n");
  strcat(usage, "     --motif <motif #> (may be repeated)\n");
  strcat(usage, "     --nmotifs <#>\n");
  strcat(usage, "     --ethresh <E-value>\n");
  strcat(usage, "     --lowcomp <value>\n");
  strcat(usage, "     --pthresh <p-value>\n");
  strcat(usage, "     --order <string>\n");
  strcat(usage, "     --nspacer <spacer length> (default=1)\n");
  strcat(usage, "     --fim\n");
  strcat(usage, "     --keep-unused\n");
  strcat(usage, "     --transpseudo <pseudocount>\n");
  strcat(usage, "     --spacerpseudo <pseudocount>\n");
  strcat(usage, "     --verbosity 1|2|3|4|5 (default=2)\n");
  strcat(usage, "     --noheader\n");
  strcat(usage, "     --noparams\n");
  strcat(usage, "     --notime\n");
  strcat(usage, "     --quiet\n");
  strcat(usage, "\n");

  /* Make sure various options are set to NULL or defaults. */
  meme_filename = NULL;
  hmm_type_str = NULL;
  hmm_type = INVALID_HMM;
  requested_motifs = new_string_list();
  request_n = 0;
  e_threshold = 0.0;
  complexity_threshold = 0.0;
  p_threshold = 0.0;
  order_string = NULL;
  spacer_states = DEFAULT_SPACER_STATES,
  fim = FALSE;
  keep_unused = FALSE;
  trans_pseudo = DEFAULT_TRANS_PSEUDO;
  spacer_pseudo = DEFAULT_SPACER_PSEUDO;
  description = NULL;
  print_header = TRUE;
  print_params = TRUE;
  print_time = FALSE;

	simple_setopt(argc, argv, option_count, options);

  // Parse the command line.
  while (1) { 
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;


    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
    	simple_getopterror(&message);
      die("Error processing command line options (%s)\n", message);
    }

    if (strcmp(option_name, "type") == 0) {
			if (option_value != NULL) {
      	hmm_type_str = option_value;
			}
    } else if (strcmp(option_name, "description") == 0) {
      description = option_value;
    } else if (strcmp(option_name, "motif") == 0) {
      add_string(option_value, requested_motifs);
    } else if (strcmp(option_name, "nmotifs") == 0) {
      request_n = atoi(option_value);
    } else if (strcmp(option_name, "ethresh") == 0) {
      e_threshold = atof(option_value);
    } else if (strcmp(option_name, "lowcomp") == 0) {
      complexity_threshold = atof(option_value);
    } else if (strcmp(option_name, "pthresh") == 0) {
      p_threshold = atof(option_value);
    } else if (strcmp(option_name, "order") == 0) {
      order_string = option_value;
    } else if (strcmp(option_name, "nspacer") == 0) {
      spacer_states = atoi(option_value);
    } else if (strcmp(option_name, "fim") == 0) {
      fim = TRUE;
    } else if (strcmp(option_name, "keep-unused") == 0) {
      keep_unused = TRUE;
    } else if (strcmp(option_name, "transpseudo") == 0) {
      trans_pseudo = atof(option_value);
    } else if (strcmp(option_name, "spacerpseudo") == 0) {
      spacer_pseudo = atof(option_value);
    } else if (strcmp(option_name, "verbosity") == 0) {
      verbosity = (VERBOSE_T)atoi(option_value);
    } else if (strcmp(option_name, "noheader") == 0) {
      print_header = FALSE;
    } else if (strcmp(option_name, "noparams") == 0) {
      print_params = FALSE;
    } else if (strcmp(option_name, "notime") == 0) {
      print_time = FALSE;
    } else if (strcmp(option_name, "quiet") == 0) {
      print_header = print_params = print_time = FALSE;
      verbosity = QUIET_VERBOSE;
    }
  }

  // Read the single required argument.
  if (option_index + 1 != argc) {
    fprintf(stderr, "%s", usage);
    exit(1);
  }
  meme_filename = argv[option_index];

  // Set up motif requests. 
  if (request_n != 0) {
    if (get_num_strings(requested_motifs) != 0) {
      die("Can't combine the -motif and -nmotifs options.\n");
    } else {
      for (i_motif = 0; i_motif < request_n; i_motif++) {
        char motif_id[MAX_MOTIF_ID_LENGTH + 1];
        sprintf(motif_id, "%d", i_motif + 1);
        add_string(motif_id, requested_motifs);
      }
    }
  }

  /* Set the model type. */
  hmm_type = convert_enum_type_str(hmm_type_str, LINEAR_HMM, HMM_STRS, 
				   NUM_HMM_T);

  /* Gotta have positive spacer length. */
  if (spacer_states <= 0) {
    die("Negative spacer length (%d).\n", spacer_states);
  }

  /* Make sure motifs weren't selected redundantly. */
  // FIXME: Add tests for complexity threshold.
  if ((get_num_strings(requested_motifs) != 0) && (e_threshold != 0.0)) {
    die("Can't use -motif or -nmotifs with -ethresh.");
  }
  if ((get_num_strings(requested_motifs) != 0) && (order_string != NULL)) {
    die("Can't use -motif or -nmotifs with -order.");
  }
  if ((order_string != NULL) && (e_threshold != 0.0)) {
    die("Can't use -ethresh and -order.");
  }

  /* Prevent trying to build a complete or star model with ordering. */
  if (order_string != NULL) {
    if (hmm_type == COMPLETE_HMM) 
      die("Can't specify motif order with a completely connected model.");
    else if (hmm_type == STAR_HMM)
      die("Can't specify motif order with a star model.");
  } 

  // Parse the order string. 
  order_spacing = create_order(order_string);

  /**********************************************
   * READING THE MOTIFS
   **********************************************/

  BOOLEAN_T read_file = FALSE;
  double pseudocount = 0;

  read_meme_file(
		 meme_filename,
		 "motif-file", // Take bg freq. from motif file.
		 pseudocount,
     REQUIRE_PSPM,
		 &num_motifs,
		 motifs,
		 &motif_occurrences,
		 &has_reverse_strand,
		 &background
		 );

  process_raw_motifs_for_model(
       &num_motifs,
       motifs,
       motif_occurrences,
       requested_motifs,
       has_reverse_strand,
       keep_unused,
       p_threshold,
       e_threshold, 
       complexity_threshold, 
       &order_spacing,
       &transp_freq,
       &spacer_ave,
       trans_pseudo,
       spacer_pseudo
  );

  /**********************************************
   * BUILDING THE HMM
   **********************************************/

  /* Build the motif-based HMM. */
  if (hmm_type == LINEAR_HMM) {

    if (order_spacing != NULL) {
      reorder_motifs(order_spacing, &num_motifs, motifs);
    }
    else {
      die("No order specified for the motifs.\n"
          "For the linear model the motif file must contain motif occurence\n" 
          "data or the motif order must be specified using "
          "the --order option.");
    }

    build_linear_hmm(
      background,
		  order_spacing,
		  spacer_states,
		  motifs,
		  num_motifs, 
		  fim,
		  &the_hmm
    );

  } else if (hmm_type == COMPLETE_HMM) {

    build_complete_hmm(
      background,
		  spacer_states,
		  motifs,
		  num_motifs,
		  transp_freq,
		  spacer_ave,
		  fim,
		  &the_hmm
    );

  } else if (hmm_type == STAR_HMM) {

    build_star_hmm(
      background,
		  spacer_states,
		  motifs,
		  num_motifs,
		  fim,
		  &the_hmm
    );

  }

  // Add some global information.
  copy_string(&(the_hmm->motif_file), meme_filename);

  /**********************************************
   * WRITING THE HMM
   **********************************************/

  /* Print the header. */
  if (print_header)
    write_header(
     program, 
     "",
		 description,
		 meme_filename,
		 NULL,
		 NULL, 
		 stdout
    );

  /* Write the HMM. */
  write_mhmm(verbosity, the_hmm, stdout);

  /* Print the program parameters. */
  if (print_params) {
    printf("Program parameters for mhmm\n");
    printf("  MEME file: %s\n", meme_filename);
    printf("  Motifs:");
    write_string_list(" ", requested_motifs, stdout);
    printf("\n");
    printf("  Model topology: %s\n",
	   convert_enum_type(hmm_type, HMM_STRS, NUM_HMM_T));
    printf("  States per spacer: %d\n", spacer_states);
    printf("  Spacers are free-insertion modules: %s\n",
	   boolean_to_string(fim));
    printf("\n");
  }

  free_array(background);
  free_string_list(requested_motifs);
  free_order(order_spacing);
  free_matrix(transp_freq);
  free_matrix(spacer_ave);
  for (i_motif = 0; i_motif < num_motifs; i_motif++)
    free_motif(&(motifs[i_motif]));
  free_mhmm(the_hmm);
  return(0);
}
Exemplo n.º 2
0
static bool JsonValueToProtoField(const BUTIL_RAPIDJSON_NAMESPACE::Value& value,
                                  const google::protobuf::FieldDescriptor* field,
                                  google::protobuf::Message* message,
                                  const Json2PbOptions& options,
                                  std::string* err) {
    if (value.IsNull()) {
        if (field->is_required()) {
            J2PERROR(err, "Missing required field: %s", field->full_name().c_str());
            return false;
        }
        return true;
    }
        
    if (field->is_repeated()) {
        if (!value.IsArray()) {
            J2PERROR(err, "Invalid value for repeated field: %s",
                     field->full_name().c_str());
            return false;
        }
    } 

    const google::protobuf::Reflection* reflection = message->GetReflection();
    switch (field->cpp_type()) {
#define CASE_FIELD_TYPE(cpptype, method, jsontype)                      \
        case google::protobuf::FieldDescriptor::CPPTYPE_##cpptype: {                      \
            if (field->is_repeated()) {                                 \
                const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();          \
                for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) { \
                    const BUTIL_RAPIDJSON_NAMESPACE::Value & item = value[index];       \
                    if (TYPE_MATCH == J2PCHECKTYPE(item, cpptype, jsontype)) { \
                        reflection->Add##method(message, field, item.Get##jsontype()); \
                    }                                                   \
                }                                                       \
            } else if (TYPE_MATCH == J2PCHECKTYPE(value, cpptype, jsontype)) { \
                reflection->Set##method(message, field, value.Get##jsontype()); \
            }                                                           \
            break;                                                      \
        }                                                           
        CASE_FIELD_TYPE(INT32,  Int32,  Int);
        CASE_FIELD_TYPE(UINT32, UInt32, Uint);
        CASE_FIELD_TYPE(BOOL,   Bool,   Bool);
        CASE_FIELD_TYPE(INT64,  Int64,  Int64);
        CASE_FIELD_TYPE(UINT64, UInt64, Uint64);
#undef CASE_FIELD_TYPE

    case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT:  
        if (field->is_repeated()) {
            const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();
            for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) {
                const BUTIL_RAPIDJSON_NAMESPACE::Value & item = value[index];
                if (!convert_float_type(item, true, message, field,
                                        reflection, err)) {
                    return false;
                }
            }
        } else if (!convert_float_type(value, false, message, field,
                                       reflection, err)) {
            return false;
        }
        break;

    case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: 
        if (field->is_repeated()) {
            const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();
            for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) {
                const BUTIL_RAPIDJSON_NAMESPACE::Value & item = value[index];
                if (!convert_double_type(item, true, message, field,
                                         reflection, err)) {
                    return false;
                }
            }
        } else if (!convert_double_type(value, false, message, field,
                                        reflection, err)) {
            return false;
        }
        break;
        
    case google::protobuf::FieldDescriptor::CPPTYPE_STRING:
        if (field->is_repeated()) {
            const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();
            for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) {
                const BUTIL_RAPIDJSON_NAMESPACE::Value & item = value[index];
                if (TYPE_MATCH == J2PCHECKTYPE(item, string, String)) { 
                    std::string str(item.GetString(), item.GetStringLength());
                    if (field->type() == google::protobuf::FieldDescriptor::TYPE_BYTES &&
                        options.base64_to_bytes) {
                        std::string str_decoded;
                        if (!butil::Base64Decode(str, &str_decoded)) {
                            J2PERROR(err, "Fail to decode base64 string=%s", str.c_str());
                            return false;
                        }
                        str = str_decoded;
                    }
                    reflection->AddString(message, field, str);
                }  
            }
        } else if (TYPE_MATCH == J2PCHECKTYPE(value, string, String)) {
            std::string str(value.GetString(), value.GetStringLength());
            if (field->type() == google::protobuf::FieldDescriptor::TYPE_BYTES &&
                options.base64_to_bytes) {
                std::string str_decoded;
                if (!butil::Base64Decode(str, &str_decoded)) {
                    J2PERROR(err, "Fail to decode base64 string=%s", str.c_str());
                    return false;
                }
                str = str_decoded;
            }
            reflection->SetString(message, field, str);
        }
        break;

    case google::protobuf::FieldDescriptor::CPPTYPE_ENUM:
        if (field->is_repeated()) {
            const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();
            for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) {
                const BUTIL_RAPIDJSON_NAMESPACE::Value & item = value[index];
                if (!convert_enum_type(item, true, message, field,
                                       reflection, err)) {
                    return false;
                }
            }
        } else if (!convert_enum_type(value, false, message, field,
                                      reflection, err)) {
            return false;
        }
        break;
        
    case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
        if (field->is_repeated()) {
            const BUTIL_RAPIDJSON_NAMESPACE::SizeType size = value.Size();
            for (BUTIL_RAPIDJSON_NAMESPACE::SizeType index = 0; index < size; ++index) {
                const BUTIL_RAPIDJSON_NAMESPACE::Value& item = value[index];
                if (TYPE_MATCH == J2PCHECKTYPE(item, message, Object)) { 
                    if (!JsonValueToProtoMessage(
                            item, reflection->AddMessage(message, field), options, err)) {
                        return false;
                    }
                } 
            }
        } else if (!JsonValueToProtoMessage(
            value, reflection->MutableMessage(message, field), options, err)) {
            return false;
        }
        break;
    }
    return true;
}