Esempio n. 1
0
static int quoted_word(GtStr *word, GtIO *bed_file, GtError *err)
{
  bool break_while = false;
  int had_err;
  gt_error_check(err);
  gt_str_reset(word);
  had_err = gt_io_expect(bed_file, QUOTE_CHAR, err);
  while (!had_err) {
    switch (gt_io_peek(bed_file)) {
      case QUOTE_CHAR:
      case GT_CARRIAGE_RETURN:
      case GT_END_OF_LINE:
      case GT_END_OF_FILE:
        break_while = true;
        break;
      default:
        gt_str_append_char(word, gt_io_next(bed_file));
    }
    if (break_while)
      break;
  }
  if (!had_err)
    had_err = gt_io_expect(bed_file, QUOTE_CHAR, err);
  return had_err;
}
Esempio n. 2
0
static int tag_line(GtIO *obo_file, GtStr *tag, GtStr *value, GtError *err)
{
  int had_err;
  gt_error_check(err);
  gt_assert(obo_file && tag && value);
  do {
    had_err = proc_any_char(obo_file, tag, false, err);
  } while (!had_err && any_char(obo_file, false));
  if (!had_err)
    had_err = gt_io_expect(obo_file, OBO_SEPARATOR_CHAR, err);
  while (!had_err && gt_io_peek(obo_file) == OBO_BLANK_CHAR)
    gt_io_next(obo_file);
  if (!had_err) {
    do {
      had_err = proc_any_char(obo_file, value, true, err);
    } while (!had_err && any_char(obo_file, true));
  }
  if (!had_err) {
    if (gt_io_peek(obo_file) == OBO_COMMENT_CHAR)
      had_err = comment_line(obo_file, err);
    else
      had_err = gt_io_expect(obo_file, GT_END_OF_LINE, err);
  }
  return had_err;
}
static int gt_xrf_abbr_parse_tree_blank_line(GtIO *xrf_abbr_file, GtError *err)
{
  int had_err;
  gt_error_check(err);
  gt_log_log("blank");
  had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err);
  while (!had_err) {
    char cc = gt_io_peek(xrf_abbr_file);
    if (cc == XRF_COMMENT_CHAR)
      return gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err);
    else if (cc == GT_CARRIAGE_RETURN) {
      gt_io_next(xrf_abbr_file);
      if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE)
        gt_io_next(xrf_abbr_file);
      break;
    }
    else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) {
      gt_io_next(xrf_abbr_file);
      break;
    }
    else
      had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err);
  }
  return had_err;
}
Esempio n. 4
0
static int parse_bed_file(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(bed_file);
  while (!had_err && gt_io_has_char(bed_file)) {
    switch (gt_io_peek(bed_file)) {
      case BLANK_CHAR:
        had_err = bed_parser_blank_line(bed_file, err);
        break;
      case COMMENT_CHAR:
        had_err = bed_parser_comment_line(bed_file, err);
        break;
      case GT_CARRIAGE_RETURN:
        gt_io_next(bed_file);
        if (gt_io_peek(bed_file) == GT_END_OF_LINE)
          gt_io_next(bed_file);
        break;
      case GT_END_OF_LINE:
        gt_io_next(bed_file);
        break;
      default:
        had_err = bed_line(bed_parser, bed_file, err);
    }
  }
  if (!had_err)
    had_err = gt_io_expect(bed_file, GT_END_OF_FILE, err);
  return had_err;
}
static int gt_xrf_abbr_parse_tree_comment_line(GtIO *xrf_abbr_file,
                                               GtError *err)
{
  int had_err;
  gt_error_check(err);
  gt_log_log("comment");
  had_err = gt_io_expect(xrf_abbr_file, XRF_COMMENT_CHAR, err);
  while (!had_err) {
    switch (gt_io_peek(xrf_abbr_file)) {
      case GT_CARRIAGE_RETURN:
        gt_io_next(xrf_abbr_file);
        if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE)
          gt_io_next(xrf_abbr_file);
        return had_err;
      case GT_END_OF_LINE:
        gt_io_next(xrf_abbr_file);
        /*@fallthrough@*/
      case GT_END_OF_FILE:
        return had_err;
      default:
        gt_io_next(xrf_abbr_file);
    }
  }
  return had_err;
}
Esempio n. 6
0
static int stanza_line(GtIO *obo_file, GtStr *type, GtError *err)
{
  int had_err;
  gt_error_check(err);
  gt_assert(obo_file && type);
  had_err = gt_io_expect(obo_file, OBO_STANZA_OPEN_CHAR, err);
  if (!had_err) {
    do {
      had_err = proc_any_char(obo_file, type, false, err);
    } while (!had_err && any_char(obo_file, false));
  }
  if (!had_err)
    had_err = gt_io_expect(obo_file, OBO_STANZA_CLOSE_CHAR, err);
  if (!had_err)
    had_err = gt_io_expect(obo_file, GT_END_OF_LINE, err);
  return had_err;
}
Esempio n. 7
0
static int bed_parser_comment_line(GtIO *bed_file, GtError *err)
{
  int had_err;
  gt_error_check(err);
  had_err = gt_io_expect(bed_file, COMMENT_CHAR, err);
  if (!had_err)
    rest_line(bed_file);
  return had_err;
}
Esempio n. 8
0
static int track_rest(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err)
{
  char cc;
  int had_err = 0;
  gt_error_check(err);
  bed_parser->offset = 0; /* reset offset for new track line */
  if (bed_separator(bed_file)) /* skip to first attribute=value pair */
    had_err = skip_blanks(bed_file, err);
  while (!had_err &&
         (cc = gt_io_peek(bed_file)) != GT_END_OF_LINE &&
         cc != GT_CARRIAGE_RETURN) {
    /* parse attribute */
    word(bed_parser->word, bed_file);
    had_err = gt_io_expect(bed_file, PAIR_SEPARATOR, err);
    /* parse value */
    if (!had_err) {
      if (gt_io_peek(bed_file) == QUOTE_CHAR)
        had_err = quoted_word(bed_parser->another_word, bed_file, err);
      else
        word(bed_parser->another_word, bed_file);
    }
    /* process offset if necessary */
    if (!had_err && !strcmp(gt_str_get(bed_parser->word), OFFSET_KEYWORD)) {
      if (gt_parse_word(&bed_parser->offset,
                         gt_str_get(bed_parser->another_word))) {
        gt_error_set(err,
                     "file \"%s\": line "GT_WU": could not parse offset value "
                     "'%s'", gt_io_get_filename(bed_file),
                     gt_io_get_line_number(bed_file),
                     gt_str_get(bed_parser->another_word));
        had_err = -1;
      }
    }
    /* skip blanks up to next attribute or end-of-line */
    if (!had_err && bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* the end of the line should now be reached */
  if (!had_err)
    had_err = gt_io_expect(bed_file, GT_END_OF_LINE, err);
  return had_err;
}
Esempio n. 9
0
static int bed_parser_blank_line(GtIO *bed_file, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  had_err = gt_io_expect(bed_file, BLANK_CHAR, err);
  while (!had_err) {
    char cc = gt_io_peek(bed_file);
    if (cc == GT_CARRIAGE_RETURN) {
      gt_io_next(bed_file);
      if (gt_io_peek(bed_file) == GT_END_OF_LINE)
        gt_io_next(bed_file);
      break;
    }
    else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) {
      gt_io_next(bed_file);
      break;
    }
    else
      had_err = gt_io_expect(bed_file, BLANK_CHAR, err);
  }
  return had_err;
}
Esempio n. 10
0
static int blank_line(GtIO *obo_file, GtError *err)
{
  int had_err;
  gt_error_check(err);
  had_err = gt_io_expect(obo_file, OBO_BLANK_CHAR, err);
  while (!had_err) {
    char cc = gt_io_peek(obo_file);
    if (cc == OBO_COMMENT_CHAR)
      return comment_line(obo_file, err);
    else if (cc == GT_CARRIAGE_RETURN) {
      gt_io_next(obo_file);
      if (gt_io_peek(obo_file) == GT_END_OF_LINE)
        gt_io_next(obo_file);
      break;
    }
    else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) {
      gt_io_next(obo_file);
      break;
    }
    else
      had_err = gt_io_expect(obo_file, OBO_BLANK_CHAR, err);
  }
  return had_err;
}
static int gt_xrf_abbr_parse_tree_tag_line(GtIO *xrf_abbr_file, GtStr *tag,
                                           GtStr *value, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_log_log("tag");
  gt_assert(xrf_abbr_file && tag && value);
  do {
    had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, tag,
                                                   false, err);
  } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, false));
  if (!had_err)
    had_err = gt_io_expect(xrf_abbr_file, XRF_SEPARATOR_CHAR, err);
  while (!had_err && gt_io_peek(xrf_abbr_file) == XRF_BLANK_CHAR)
    gt_io_next(xrf_abbr_file);
  if (!had_err) {
    do {
      had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, value,
                                                     true, err);
    } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, true));
  }
  if (!had_err) {
    if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR)
      had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err);
    else
      had_err = gt_io_expect(xrf_abbr_file, GT_END_OF_LINE, err);
  }
  if (!had_err && !gt_xrf_abbr_parse_tree_valid_label(gt_str_get(tag))) {
    gt_warning("file \"%s\": line "GT_WU": unknown label \"%s\"",
                gt_io_get_filename(xrf_abbr_file),
                gt_io_get_line_number(xrf_abbr_file),
                gt_str_get(tag));
  }
  gt_log_log("parsed line %s/%s", gt_str_get(tag), gt_str_get(value));
  return had_err;
}
Esempio n. 12
0
static int comment_line(GtIO *obo_file, GtError *err)
{
  int had_err;
  gt_error_check(err);
  had_err = gt_io_expect(obo_file, OBO_COMMENT_CHAR, err);
  while (!had_err) {
    switch (gt_io_peek(obo_file)) {
      case GT_CARRIAGE_RETURN:
        gt_io_next(obo_file);
        if (gt_io_peek(obo_file) == GT_END_OF_LINE)
          gt_io_next(obo_file);
        return had_err;
      case GT_END_OF_LINE:
        gt_io_next(obo_file);
        /*@fallthrough@*/
      case GT_END_OF_FILE:
        return had_err;
      default:
        gt_io_next(obo_file);
    }
  }
  return had_err;
}
Esempio n. 13
0
static int parse_obo_file(GtOBOParseTree *obo_parse_tree,
                          GtIO *obo_file, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(obo_parse_tree && obo_file);
  while (!had_err && ignored_char(obo_file)) {
    had_err = ignored_line(obo_file, err);
  }
  if (!had_err)
    had_err = header(obo_parse_tree, obo_file, err);
  while (!had_err && gt_io_has_char(obo_file)) {
    switch (gt_io_peek(obo_file)) {
      case OBO_BLANK_CHAR:
        had_err = blank_line(obo_file, err);
        break;
      case OBO_COMMENT_CHAR:
        had_err = comment_line(obo_file, err);
        break;
      case GT_CARRIAGE_RETURN:
        gt_io_next(obo_file);
        if (gt_io_peek(obo_file) == GT_END_OF_LINE)
          gt_io_next(obo_file);
        break;
      case GT_END_OF_LINE:
        gt_io_next(obo_file);
        break;
      default:
        had_err = stanza(obo_parse_tree, obo_file, err);
    }
  }
  if (!had_err)
    had_err = gt_io_expect(obo_file, GT_END_OF_FILE, err);
  if (!had_err)
    had_err = gt_obo_parse_tree_validate_stanzas(obo_parse_tree, err);
  return had_err;
}
static int parse_xrf_abbr_file(GtXRFAbbrParseTree *xrf_abbr_parse_tree,
                               GtIO *xrf_abbr_file, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(xrf_abbr_parse_tree && xrf_abbr_file);
  while (!had_err && gt_xrf_abbr_parse_tree_ignored_char(xrf_abbr_file)) {
    had_err = gt_xrf_abbr_parse_tree_ignored_line(xrf_abbr_file, err);
  }
  while (!had_err && gt_io_has_char(xrf_abbr_file)) {
    switch (gt_io_peek(xrf_abbr_file)) {
      case XRF_BLANK_CHAR:
        had_err = gt_xrf_abbr_parse_tree_blank_line(xrf_abbr_file, err);
        break;
      case XRF_COMMENT_CHAR:
        had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err);
        break;
      case GT_CARRIAGE_RETURN:
        gt_io_next(xrf_abbr_file);
        if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE)
          gt_io_next(xrf_abbr_file);
        break;
      case GT_END_OF_LINE:
        gt_io_next(xrf_abbr_file);
        break;
      default:
        had_err = gt_xrf_abbr_parse_tree_entry(xrf_abbr_parse_tree,
                                               xrf_abbr_file, err);
    }
  }
  if (!had_err)
    had_err = gt_io_expect(xrf_abbr_file, GT_END_OF_FILE, err);
  if (!had_err)
    had_err = gt_xrf_abbr_parse_tree_validate_entries(xrf_abbr_parse_tree,
                                                      err);
  return had_err;
}
Esempio n. 15
0
static int bed_rest(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err)
{
  GtUword block_count = 0;
  GtGenomeNode *gn = NULL;
  GtRange range;
  GtStr *seqid;
  int had_err;
  gt_error_check(err);
  /* column 1.: chrom */
  seqid = get_seqid(bed_parser);
  had_err = skip_blanks(bed_file, err);
  /* column 2.: chromStart */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    had_err = skip_blanks(bed_file, err);
  }
  /* column 3.: chromEnd */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    had_err = parse_bed_range(&range, bed_parser->word,
                              bed_parser->another_word, bed_parser->offset,
                              bed_file, false, err);
  }
  if (!had_err) {
    /* add region */
    gt_region_node_builder_add_region(bed_parser->region_node_builder,
                                      gt_str_get(seqid), range);
    /* create feature */
    gn = gt_feature_node_new(seqid,
                             bed_parser->feature_type
                             ? bed_parser->feature_type
                             : BED_FEATURE_TYPE,
                             range.start, range.end, GT_STRAND_BOTH);
    gt_queue_add(bed_parser->feature_nodes, gn);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 4.: name */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      gt_feature_node_add_attribute((GtFeatureNode*) gn, GT_GFF_NAME,
                                    gt_str_get(bed_parser->word));
    }
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 5.: score */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      bool score_is_defined;
      float score_value;
      had_err = gt_parse_score(&score_is_defined, &score_value,
                               gt_str_get(bed_parser->word),
                               gt_io_get_line_number(bed_file),
                               gt_io_get_filename(bed_file), err);
      if (!had_err && score_is_defined)
        gt_feature_node_set_score((GtFeatureNode*) gn, score_value);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 6.: strand */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      GtStrand strand;
      had_err = gt_parse_strand(&strand, gt_str_get(bed_parser->word),
                                gt_io_get_line_number(bed_file),
                                gt_io_get_filename(bed_file), err);
      if (!had_err)
        gt_feature_node_set_strand((GtFeatureNode*) gn, strand);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 7.: thickStart */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 8.: thickEnd */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    if (gt_str_length(bed_parser->another_word)) {
      gt_assert(gt_str_length(bed_parser->word));
      /* got a thickStart and a thickEnd -> construct corresponding feature */
      had_err = parse_bed_range(&range, bed_parser->word,
                                bed_parser->another_word, bed_parser->offset,
                                bed_file, true, err);
      if (!had_err && range.start <= range.end)
        construct_thick_feature(bed_parser, (GtFeatureNode*) gn, range);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 9.: itemRgb */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    /* we do not use the RGB values */
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 10.: blockCount */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      if (gt_parse_uword(&block_count, gt_str_get(bed_parser->word))) {
        gt_error_set(err,
                     "file \"%s\": line "GT_WU": could not parse blockCount",
                     gt_io_get_filename(bed_file),
                     gt_io_get_line_number(bed_file));
        had_err = -1;
      }
      else {
        /* reset to parse/process blockSizes and blockStarts properly */
        gt_str_reset(bed_parser->word);
        gt_str_reset(bed_parser->another_word);
      }
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 11.: blockSizes */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 12.: blockStarts */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* process blocks if necessary */
  if (!had_err && block_count) {
    had_err = process_blocks(bed_parser, (GtFeatureNode*) gn, block_count,
                             bed_parser->word, bed_parser->another_word,
                             bed_file, err);
  }
  /* the end of the line should now be reached */
  if (!had_err)
    had_err = gt_io_expect(bed_file, GT_END_OF_LINE, err);
  return had_err;
}