Esempio n. 1
0
static int skip_blanks(GtIO *bed_file, GtError *err)
{
  gt_error_check(err);
  if (!bed_separator(bed_file)) {
    gt_error_set(err, "file \"%s\": line %lu: expected blank or tabulator, got "
                      "'%c'", gt_io_get_filename(bed_file),
                      gt_io_get_line_number(bed_file), gt_io_peek(bed_file));
    return -1;
  }
  while (bed_separator(bed_file))
    gt_io_next(bed_file);
  return 0;
}
Esempio n. 2
0
static int track_rest(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err)
{
  char cc;
  int had_err = 0;
  gt_error_check(err);
  bed_parser->offset = 0; /* reset offset for new track line */
  if (bed_separator(bed_file)) /* skip to first attribute=value pair */
    had_err = skip_blanks(bed_file, err);
  while (!had_err &&
         (cc = gt_io_peek(bed_file)) != GT_END_OF_LINE &&
         cc != GT_CARRIAGE_RETURN) {
    /* parse attribute */
    word(bed_parser->word, bed_file);
    had_err = gt_io_expect(bed_file, PAIR_SEPARATOR, err);
    /* parse value */
    if (!had_err) {
      if (gt_io_peek(bed_file) == QUOTE_CHAR)
        had_err = quoted_word(bed_parser->another_word, bed_file, err);
      else
        word(bed_parser->another_word, bed_file);
    }
    /* process offset if necessary */
    if (!had_err && !strcmp(gt_str_get(bed_parser->word), OFFSET_KEYWORD)) {
      if (gt_parse_word(&bed_parser->offset,
                         gt_str_get(bed_parser->another_word))) {
        gt_error_set(err,
                     "file \"%s\": line "GT_WU": could not parse offset value "
                     "'%s'", gt_io_get_filename(bed_file),
                     gt_io_get_line_number(bed_file),
                     gt_str_get(bed_parser->another_word));
        had_err = -1;
      }
    }
    /* skip blanks up to next attribute or end-of-line */
    if (!had_err && bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* the end of the line should now be reached */
  if (!had_err)
    had_err = gt_io_expect(bed_file, GT_END_OF_LINE, err);
  return had_err;
}
Esempio n. 3
0
static int bed_rest(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err)
{
  GtUword block_count = 0;
  GtGenomeNode *gn = NULL;
  GtRange range;
  GtStr *seqid;
  int had_err;
  gt_error_check(err);
  /* column 1.: chrom */
  seqid = get_seqid(bed_parser);
  had_err = skip_blanks(bed_file, err);
  /* column 2.: chromStart */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    had_err = skip_blanks(bed_file, err);
  }
  /* column 3.: chromEnd */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    had_err = parse_bed_range(&range, bed_parser->word,
                              bed_parser->another_word, bed_parser->offset,
                              bed_file, false, err);
  }
  if (!had_err) {
    /* add region */
    gt_region_node_builder_add_region(bed_parser->region_node_builder,
                                      gt_str_get(seqid), range);
    /* create feature */
    gn = gt_feature_node_new(seqid,
                             bed_parser->feature_type
                             ? bed_parser->feature_type
                             : BED_FEATURE_TYPE,
                             range.start, range.end, GT_STRAND_BOTH);
    gt_queue_add(bed_parser->feature_nodes, gn);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 4.: name */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      gt_feature_node_add_attribute((GtFeatureNode*) gn, GT_GFF_NAME,
                                    gt_str_get(bed_parser->word));
    }
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 5.: score */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      bool score_is_defined;
      float score_value;
      had_err = gt_parse_score(&score_is_defined, &score_value,
                               gt_str_get(bed_parser->word),
                               gt_io_get_line_number(bed_file),
                               gt_io_get_filename(bed_file), err);
      if (!had_err && score_is_defined)
        gt_feature_node_set_score((GtFeatureNode*) gn, score_value);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 6.: strand */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      GtStrand strand;
      had_err = gt_parse_strand(&strand, gt_str_get(bed_parser->word),
                                gt_io_get_line_number(bed_file),
                                gt_io_get_filename(bed_file), err);
      if (!had_err)
        gt_feature_node_set_strand((GtFeatureNode*) gn, strand);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 7.: thickStart */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 8.: thickEnd */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    if (gt_str_length(bed_parser->another_word)) {
      gt_assert(gt_str_length(bed_parser->word));
      /* got a thickStart and a thickEnd -> construct corresponding feature */
      had_err = parse_bed_range(&range, bed_parser->word,
                                bed_parser->another_word, bed_parser->offset,
                                bed_file, true, err);
      if (!had_err && range.start <= range.end)
        construct_thick_feature(bed_parser, (GtFeatureNode*) gn, range);
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 9.: itemRgb */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    /* we do not use the RGB values */
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 10.: blockCount */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (gt_str_length(bed_parser->word)) {
      if (gt_parse_uword(&block_count, gt_str_get(bed_parser->word))) {
        gt_error_set(err,
                     "file \"%s\": line "GT_WU": could not parse blockCount",
                     gt_io_get_filename(bed_file),
                     gt_io_get_line_number(bed_file));
        had_err = -1;
      }
      else {
        /* reset to parse/process blockSizes and blockStarts properly */
        gt_str_reset(bed_parser->word);
        gt_str_reset(bed_parser->another_word);
      }
    }
  }
  if (!had_err && bed_separator(bed_file))
    had_err = skip_blanks(bed_file, err);
  /* optional column 11.: blockSizes */
  if (!had_err) {
    word(bed_parser->word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* optional column 12.: blockStarts */
  if (!had_err) {
    word(bed_parser->another_word, bed_file);
    if (bed_separator(bed_file))
      had_err = skip_blanks(bed_file, err);
  }
  /* process blocks if necessary */
  if (!had_err && block_count) {
    had_err = process_blocks(bed_parser, (GtFeatureNode*) gn, block_count,
                             bed_parser->word, bed_parser->another_word,
                             bed_file, err);
  }
  /* the end of the line should now be reached */
  if (!had_err)
    had_err = gt_io_expect(bed_file, GT_END_OF_LINE, err);
  return had_err;
}