Esempio n. 1
0
/*****************************************************************************
 * dreme > model > negatives
 *
 *  name            the name of the negative dataset
 *  count           the number of sequences in the negative dataset
 *  from            the source of the negative dataset (eg shuffled positives)
 *  file            the file containing the negative dataset (optional)
 *  last_mod_date   the last modified date of the file (optional)
 ****************************************************************************/
static void start_ele_negatives(PS_T *ps, const xmlChar **attrs) {
  char *name, *file, *lastmod;
  long count;
  int from;

  file = NULL;
  lastmod = NULL;

  char* from_options[2] = {"file", "shuffled"};
  int from_values[2] = {DREME_NEG_FILE, DREME_NEG_SHUFFLED};
  MULTI_T from_multi = {.count = 2, .options = from_options, .outputs = from_values, .target = &(from)};

  char* names[5] = {"count", "file", "from", "last_mod_date", "name"};
  int (*parsers[5])(char*, void*) = {ld_long, ld_str, ld_multi, ld_str, ld_str};
  void *data[5] = {&count, &file, &from_multi, &lastmod, &name};
  BOOLEAN_T required[5] = {TRUE, FALSE, TRUE, FALSE, TRUE};
  BOOLEAN_T done[5];

  parse_attributes(dreme_attr_parse_error, ps, "negatives", attrs, 5, names, parsers, data, required, done);

  if (ps->state != PS_ERROR && from == DREME_NEG_FILE) {
    if (file == NULL) {
      dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "file", NULL);
    } 
    if (lastmod == NULL) {
      dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "negatives", "last_mod_date", NULL);
    }
  }

  if (ps->callbacks->handle_negatives && ps->state != PS_ERROR) {
    ps->callbacks->handle_negatives(ps->user_data, name, count, (DREME_NEG_EN)from, file, lastmod);
  }
}

/*****************************************************************************
 * dreme > model > background
 *
 *  type            is the alphabet DNA or RNA
 *  A               frequency of A
 *  C               frequency of C
 *  G               frequency of G
 *  T/U             frequency of T or U depending on type
 *  from            from the negative dataset or a background file
 *  file            the background file (optional)
 *  last_mod_date   the last modified date of the background file (optional)
 ****************************************************************************/
static void start_ele_background(PS_T *ps, const xmlChar **attrs) {
  int type, from;
  double A, C, G, T;
  char *file, *lastmod;

  // set reasonable defaults
  type = DREME_BG_FROM_DATASET;
  from = DREME_ALPH_DNA;
  file = NULL;
  lastmod = NULL;

  char* type_options[2] = {"dna", "rna"};
  int type_values[2] = {DREME_ALPH_DNA, DREME_ALPH_RNA};
  MULTI_T type_multi = {.count = 2, .options = type_options, 
    .outputs = type_values, .target = &(type)};

  char* from_options[2] = {"dataset", "file"};
  int from_values[2] = {DREME_BG_FROM_DATASET, DREME_BG_FROM_FILE};
  MULTI_T from_multi = {.count = 2, .options = from_options, 
    .outputs = from_values, .target = &(from)};

  char* names[9] = {"A", "C", "G", "T", "U", "file", "from", 
    "last_mod_date", "type"};
  int (*parsers[9])(char*, void*) = {ld_pvalue, ld_pvalue, ld_pvalue, 
    ld_pvalue, ld_pvalue, ld_str, ld_multi, ld_str, ld_multi};
  void *data[9] = {&A, &C, &G, &T, &T, &file, &from_multi, &lastmod, &type_multi};
  BOOLEAN_T required[9] = {TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE};
  BOOLEAN_T done[9];

  parse_attributes(dreme_attr_parse_error, ps, "background", attrs, 9, names, parsers, data, required, done);

  if (type == DREME_ALPH_DNA) {
    if (!done[3]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "T", NULL);
    if (done[4]) error(ps, "Attribute background::U should not exist because alphabet is DNA.\n");
  } else {
    if (!done[4]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "U", NULL);
    if (done[3]) error(ps, "Attribute background::T should not exist because alphabet is RNA.\n");
  }
  if (from == DREME_BG_FROM_FILE) {
    if (!done[5]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "file", NULL);
    if (!done[7]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "last_mod_date", NULL);
  }

  if (ps->state != PS_ERROR) {
    // check sums to 1
    double delta = A + C + G + T - 1;
    delta = (delta < 0 ? -delta : delta);
    if (delta > 0.004) {
      // dreme writes background probabilities to 3 decimal places so assuming 
      // the error on each is at maximum 0.001 then the total error for the 
      // sum must be less than or equal to 0.004
      error(ps, "Probabilities of background do not sum to 1, got %g .\n",
          A + C + G + T);
    }
  }

  if (ps->state != PS_ERROR) {
    ps->alphabet = (DREME_ALPH_EN)type;
  }

  if (ps->callbacks->handle_background && ps->state != PS_ERROR) {
    ps->callbacks->handle_background(ps->user_data, (DREME_ALPH_EN)type, A, C, G, T, from, file, lastmod);
  }
}

/*****************************************************************************
 * dreme > model > stop
 *
 *  evalue          the stopping evalue (returned as log10).
 *  count           the stopping count.
 *  time            the stopping time.
 ****************************************************************************/
static void start_ele_stop(PS_T *ps, const xmlChar **attrs) {
  int count, time;
  double log10evalue;

  char* names[3] = {"count", "evalue", "time"};
  int (*parsers[3])(char*, void*) = {ld_int, ld_log10_ev, ld_int};
  void *data[3] = {&count, &log10evalue, &time};
  BOOLEAN_T required[3] = {FALSE, FALSE, FALSE};
  BOOLEAN_T done[3];

  parse_attributes(dreme_attr_parse_error, ps, "stop", attrs, 3, names, parsers, data, required, done);

  if (ps->callbacks->handle_stop && ps->state != PS_ERROR) {
    ps->callbacks->handle_stop(ps->user_data, &log10evalue, &count, &time);
  }
}

/*****************************************************************************
 * dreme > model > /ngen
 * the number of generations to check (or something like that).
 ****************************************************************************/
static void end_ele_ngen(PS_T *ps) {
  int ngen;

  if (ld_int(ps->characters.buffer, &ngen)) {
    error(ps, "Bad value \"%s\" for ngen.\n", ps->characters.buffer);
  }

  if (ps->callbacks->handle_ngen && ps->state != PS_ERROR) {
    ps->callbacks->handle_ngen(ps->user_data, ngen);
  }
}
Esempio n. 2
0
/*****************************************************************************
 * dreme > model > background
 *
 *  type            is the alphabet DNA or RNA (optional when custom alphabet specified)
 *  <symbol>        frequency of <symbol> from core alphabet
 *  from            from the negative dataset or a background file
 *  file            the background file (optional)
 *  last_mod_date   the last modified date of the background file (optional)
 ****************************************************************************/
static void start_ele_background(PS_T *ps, const xmlChar **attrs) {
  int type, from;
  char *file, *lastmod;

  // set reasonable defaults
  type = DREME_BG_FROM_DATASET;
  from = DREME_ALPH_DNA;
  file = NULL;
  lastmod = NULL;

  char* type_options[2] = {"dna", "rna"};
  int type_values[2] = {DREME_ALPH_DNA, DREME_ALPH_RNA};
  MULTI_T type_multi = {.count = 2, .options = type_options, 
    .outputs = type_values, .target = &(type)};

  char* from_options[2] = {"dataset", "file"};
  int from_values[2] = {DREME_BG_FROM_DATASET, DREME_BG_FROM_FILE};
  MULTI_T from_multi = {.count = 2, .options = from_options, 
    .outputs = from_values, .target = &(from)};

  char* names[4] = {"file", "from", "last_mod_date", "type"};
  int (*parsers[4])(char*, void*) = {ld_str, ld_multi, ld_str, ld_multi};
  void *data[4] = {&file, &from_multi, &lastmod, &type_multi};
  BOOLEAN_T required[4] = {FALSE, TRUE, FALSE, FALSE};
  BOOLEAN_T done[4];

  required[3] = !ps->seen_alphabet;
  parse_attributes(dreme_attr_parse_error, ps, "background", attrs, 4, names, parsers, data, required, done);

  if (from == DREME_BG_FROM_FILE) {
    if (!done[0]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "file", NULL);
    if (!done[2]) dreme_attr_parse_error(ps, PARSE_ATTR_MISSING, "background", "last_mod_date", NULL);
  }

  // if we haven't seen the alphabet then we must define it from the type
  if (!ps->seen_alphabet) {
    int idx = 0;
    rbtree_make(ps->alph_ids, "A", &idx); idx++;
    rbtree_make(ps->alph_ids, "C", &idx); idx++;
    rbtree_make(ps->alph_ids, "G", &idx); idx++;
    rbtree_make(ps->alph_ids, (type == DREME_ALPH_DNA ? "T" : "U"), &idx);
  }
  parse_freq_attrs(ps, "background", attrs);

  if (ps->callbacks->handle_background && ps->state != PS_ERROR) {
    ps->callbacks->handle_background(ps->user_data, rbtree_size(ps->alph_ids), ps->freqs, from, file, lastmod);
  }
}

/*****************************************************************************
 * dreme > model > stop
 *
 *  evalue          the stopping evalue (returned as log10).
 *  count           the stopping count.
 *  time            the stopping time.
 ****************************************************************************/
static void start_ele_stop(PS_T *ps, const xmlChar **attrs) {
  int count, time;
  double log10evalue;

  char* names[3] = {"count", "evalue", "time"};
  int (*parsers[3])(char*, void*) = {ld_int, ld_log10_ev, ld_int};
  void *data[3] = {&count, &log10evalue, &time};
  BOOLEAN_T required[3] = {FALSE, FALSE, FALSE};
  BOOLEAN_T done[3];

  parse_attributes(dreme_attr_parse_error, ps, "stop", attrs, 3, names, parsers, data, required, done);

  if (ps->callbacks->handle_stop && ps->state != PS_ERROR) {
    ps->callbacks->handle_stop(ps->user_data, &log10evalue, &count, &time);
  }
}

/*****************************************************************************
 * dreme > model > /ngen
 * the number of generations to check (or something like that).
 ****************************************************************************/
static void end_ele_ngen(PS_T *ps) {
  int ngen;

  if (ld_int(ps->characters.buffer, &ngen)) {
    error(ps, "Bad value \"%s\" for ngen.\n", ps->characters.buffer);
  }

  if (ps->callbacks->handle_ngen && ps->state != PS_ERROR) {
    ps->callbacks->handle_ngen(ps->user_data, ngen);
  }
}