Ejemplo n.º 1
0
static int process_blocks(GtBEDParser *bed_parser, GtFeatureNode *fn,
                          unsigned long block_count, GtStr *block_sizes,
                          GtStr *block_starts, GtIO *bed_file, GtError *err)
{
  GtSplitter *size_splitter = NULL , *start_splitter = NULL;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(fn && block_count && block_sizes && block_starts);
  if (!gt_str_length(block_sizes)) {
    gt_error_set(err,
                 "file \"%s\": line %lu: blockCount given without blockSizes",
                 gt_io_get_filename(bed_file),
                 gt_io_get_line_number(bed_file));
    had_err = -1;
  }
  if (!had_err && !gt_str_length(block_starts)) {
    gt_error_set(err,
                 "file \"%s\": line %lu: blockCount given without blockStarts",
                 gt_io_get_filename(bed_file),
                 gt_io_get_line_number(bed_file));
    had_err = -1;
  }
  if (!had_err) {
    /* remove terminal commas found in real-world BED files */
    remove_terminal_comma(block_sizes);
    remove_terminal_comma(block_starts);
  }
  if (!had_err) {
    size_splitter = gt_splitter_new();
    gt_splitter_split(size_splitter, gt_str_get(block_sizes),
                      gt_str_length(block_sizes), ',');
    if (gt_splitter_size(size_splitter) != block_count) {
      gt_error_set(err, "file \"%s\": line %lu: blockSizes column does not "
                        "have blockCount=%lu many comma separated fields",
                   gt_io_get_filename(bed_file),
                   gt_io_get_line_number(bed_file), block_count);
      had_err = -1;
    }
  }
  if (!had_err) {
    start_splitter = gt_splitter_new();
    gt_splitter_split(start_splitter, gt_str_get(block_starts),
                      gt_str_length(block_starts), ',');
    if (gt_splitter_size(start_splitter) != block_count) {
      gt_error_set(err, "file \"%s\": line %lu: blockStarts column does not "
                        "have " "blockCount=%lu many comma separated fields",
                   gt_io_get_filename(bed_file),
                   gt_io_get_line_number(bed_file), block_count);
      had_err = -1;
    }
  }
  if (!had_err) {
    had_err = create_block_features(bed_parser, fn, block_count, size_splitter,
                                    start_splitter, bed_file, err);
  }
  gt_splitter_delete(start_splitter);
  gt_splitter_delete(size_splitter);
  return had_err;
}
Ejemplo n.º 2
0
static int extracttarget_from_seqfiles(const char *target,
                                       GtStrArray *seqfiles,
                                       GtError *err)
{
  GtStr *unescaped_target;
  char *escaped_target;
  GtSplitter *splitter;
  unsigned long i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(target && seqfiles);
  splitter = gt_splitter_new();
  unescaped_target = gt_str_new();
  escaped_target = gt_cstr_dup(target);
  gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ',');
  for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) {
    GtSplitter *blank_splitter;
    char *token = gt_splitter_get_token(splitter, i);
    blank_splitter = gt_splitter_new();
    gt_splitter_split(blank_splitter, token, strlen(token), ' ');
    had_err = gt_gff3_unescape(unescaped_target,
                               gt_splitter_get_token(blank_splitter, 0),
                               strlen(gt_splitter_get_token(blank_splitter, 0)),
                               err);
    if (!had_err) {
      unsigned long j;
      for (j = 0; j < gt_str_array_size(seqfiles); j++) {
        unsigned long k;
        GtBioseq *bioseq;
        if (!(bioseq =  gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) {
          had_err = -1;
          break;
        }
        for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) {
          TargetInfo target_info;
          const char *desc = gt_bioseq_get_description(bioseq, k);
          target_info.bioseq = bioseq;
          target_info.seqnum = k;
          gt_string_matching_bmh(desc, strlen(desc),
                                 gt_str_get(unescaped_target),
                                 gt_str_length(unescaped_target), show_target,
                                 &target_info);
        }
        gt_bioseq_delete(bioseq);
      }
    }
    gt_splitter_delete(blank_splitter);
  }
  gt_free(escaped_target);
  gt_str_delete(unescaped_target);
  gt_splitter_delete(splitter);
  return had_err;
}
Ejemplo n.º 3
0
GtXRFChecker* gt_xrf_checker_new(const char *file_path, GtError *err)
{
  GtXRFChecker *xrc;
  GtUword i;
  gt_error_check(err);
  gt_assert(file_path);

  xrc = gt_calloc(1UL, sizeof (GtXRFChecker));
  xrc->xpt = gt_xrf_abbr_parse_tree_new(file_path, err);
  if (!xrc->xpt) {
    gt_xrf_checker_delete(xrc);
    return NULL;
  }
  xrc->abbrvs = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  for (i = 0; i < gt_xrf_abbr_parse_tree_num_of_entries(xrc->xpt); i++) {
    const GtXRFAbbrEntry *e = gt_xrf_abbr_parse_tree_get_entry(xrc->xpt, i);
    const char *synonym;
    gt_hashmap_add(xrc->abbrvs,
                   (void*) gt_xrf_abbr_entry_get_value(e, "abbreviation"),
                   (void*) e);
    if ((synonym = gt_xrf_abbr_entry_get_value(e, "synonym"))) {
      gt_hashmap_add(xrc->abbrvs, (void*) synonym, (void*) e);
    }
  }
  xrc->splitter = gt_splitter_new();
  return xrc;
}
Ejemplo n.º 4
0
Archivo: init.c Proyecto: 9beckert/TIR
static void proc_env_options(void)
{
  int argc;
  char *env_options, **argv;
  GtSplitter *splitter;
  GtError *err;
  /* construct argument vector from $GT_ENV_OPTIONS */
  env_options = getenv("GT_ENV_OPTIONS");
  if (!env_options)
    return;
  env_options = gt_cstr_dup(env_options); /* make writeable copy */
  splitter = gt_splitter_new();
  gt_splitter_split(splitter, env_options, strlen(env_options), ' ');
  argc = gt_splitter_size(splitter);
  argv = gt_cstr_array_preprend((const char**) gt_splitter_get_tokens(splitter),
                             "env");
  argc++;
  /* parse options contained in $GT_ENV_OPTIONS */
  err = gt_error_new();
  switch (parse_env_options(argc, (const char**) argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR:
      fprintf(stderr, "error parsing $GT_ENV_OPTIONS: %s\n", gt_error_get(err));
      gt_error_unset(err);
      break;
    case GT_OPTION_PARSER_REQUESTS_EXIT: break;
  }
  gt_error_delete(err);
  gt_free(env_options);
  gt_splitter_delete(splitter);
  gt_cstr_array_delete(argv);
}
Ejemplo n.º 5
0
static int file_find_in_env_generic(GtStr *path, const char *file,
                                    const char *env, FileExistsFunc file_exists,
                                    GtError *err)
{
  char *pathvariable, *pathcomponent = NULL;
  GtSplitter *splitter = NULL;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(file);
  gt_assert(file_exists);

  /* check if 'file' has dirname */
  gt_file_dirname(path, file);
  if (gt_str_length(path))
    return had_err;
  /* 'file' has no dirname -> scan $env */
  pathvariable = getenv(env);
  if (pathvariable != NULL)
    pathvariable = gt_cstr_dup(pathvariable); /* make writeable copy */
  else {
    gt_error_set(err, "environment variable $%s is not defined", env);
    had_err = -1;
  }

  if (!had_err) {
    splitter = gt_splitter_new();
    gt_splitter_split(splitter, pathvariable,
                      (GtUword) strlen(pathvariable), GT_PATH_VAR_SEPARATOR);
    for (i = 0; i < gt_splitter_size(splitter); i++) {
      pathcomponent = gt_splitter_get_token(splitter, i);
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
      gt_str_append_char(path, GT_PATH_SEPARATOR);
      gt_str_append_cstr(path, file);
      if (file_exists(gt_str_get(path)))
        break;
    }
    if (i < gt_splitter_size(splitter)) {
      /* file found in path */
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
    }
    else {
      /* file not found in path */
      gt_str_reset(path);
    }
  }

  /* free */
  gt_free(pathvariable);
  gt_splitter_delete(splitter);

  return had_err;
}
Ejemplo n.º 6
0
int gt_splitter_unit_test(GtError *err)
{
  static char string_1[]  = "a bb ccc dddd eeeee",
              string_2[]  = "a\tbb\tccc\tdddd\teeeee",
              string_3[]  = "",
              string_4[]  = "a  b",
              string_5[]  = "ac bc ",
              string_6[]  = "test";
  GtSplitter *s;
  int had_err = 0;
  gt_error_check(err);
  s = gt_splitter_new();

  /* string_1 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_1, strlen(string_1), ' ');
  gt_ensure(gt_splitter_size(s) == 5);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bb") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 2), "ccc") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 3), "dddd") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 4), "eeeee") == 0);
  gt_splitter_reset(s);

  /* string_2 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_2, strlen(string_2), '\t');
  gt_ensure(gt_splitter_size(s) == 5);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bb") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 2), "ccc") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 3), "dddd") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 4), "eeeee") == 0);
  gt_splitter_reset(s);

  /* string_3 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_3, strlen(string_3), '\t');
  gt_ensure(gt_splitter_size(s) == 1);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "") == 0);
  gt_splitter_reset(s);

  /* string_4 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_4, strlen(string_4), ' ');
  gt_ensure(gt_splitter_size(s) == 3);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 1), "") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 2), "b") == 0);
  gt_splitter_reset(s);

  /* string_5 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_5, strlen(string_5), ' ');
  gt_ensure(gt_splitter_size(s) == 3);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "ac") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bc") == 0);
  gt_ensure(strcmp(gt_splitter_get_token(s, 2), "") == 0);
  gt_splitter_reset(s);

  /* string_6 */
  gt_ensure(!gt_splitter_size(s));
  gt_splitter_split(s, string_6, strlen(string_6), ';');
  gt_ensure(gt_splitter_size(s) == 1);
  gt_ensure(strcmp(gt_splitter_get_token(s, 0), "test") == 0);

  /* free */
  gt_splitter_delete(s);

  return had_err;
}
static int gt_compreads_compress_arguments_check(GT_UNUSED int rest_argc,
                                       void *tool_arguments,
                                       GtError *err)
{
  int had_err = 0;
  GtCsrHcrEncodeArguments *arguments = tool_arguments;
  GtSplitter *splitter = NULL;
  GtStr *buffer;
  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_array_size(arguments->files) == 0) {
    gt_error_set(err, "option \"-files\" is mandatory and requires"
                      " at least one filename as argument!");
    had_err = -1;
  }

  if (!had_err) {
    if (gt_str_length(arguments->name) == 0) {
      if (gt_str_array_size(arguments->files) > 1UL) {
        gt_error_set(err, "option \"-name\" needs to be specified"
                          " if more than one file is given");
        had_err = -1;
      }
      else {
        GtUword i;
        char *basename;
        splitter = gt_splitter_new();
        basename = gt_basename(gt_str_array_get(arguments->files, 0));
        buffer = gt_str_new_cstr(basename);
        gt_splitter_split(splitter, gt_str_get(buffer), gt_str_length(buffer),
                          '.');
        for (i = 0; i < gt_splitter_size(splitter) - 1; i++) {
          gt_str_append_cstr(arguments->name,
                             gt_splitter_get_token(splitter, i));
          if (i < gt_splitter_size(splitter) - 2)
            gt_str_append_char(arguments->name, '.');
        }
        gt_free(basename);
        gt_splitter_delete(splitter);
        gt_str_delete(buffer);
      }
    }
  }

  if (!had_err) {
    char *sampling_type = gt_str_get(arguments->method);
    static const char *methods[] = { "page", "regular", "none" };

    if (!strcmp(methods[0], sampling_type)) {
      arguments->pagewise = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_PAGE_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "page sampling was chosen, but sampling"
                          " rate was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[1], sampling_type)) {
      arguments->regular = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_REGULAR_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "regular sampling was chosen, but sampling rate "
                          " was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[2], sampling_type)) {
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = 0;
      else if (arguments->srate != 0) {
        gt_error_set(err, "no sampling was chosen, but sampling rate was"
                          " set to "GT_WU"! this seems wrong.",
                          arguments->srate);
        had_err = -1;
      }
    }
    else {
      gt_error_set(err, "somethings wrong with the stype option");
      had_err = -1;
    }
  }

  if (!had_err) {
    if (arguments->arg_range.start != GT_UNDEF_UWORD) {
      if (arguments->arg_range.start <= (GtUword) UINT_MAX) {
        gt_safe_assign(arguments->qrng.start, arguments->arg_range.start);
        if (arguments->arg_range.end <= (GtUword) UINT_MAX)
          gt_safe_assign(arguments->qrng.end, arguments->arg_range.end);
        else
          had_err = -1;
      }
      else
        had_err = -1;
    }
    if (had_err)
      gt_error_set(err, "Range for qualities: value to large! larger than %u",
                   UINT_MAX);
  }
  return had_err;
}
static int hmmsearch_process_coarse_hits(
                                       char *table_filename,
                                       GtCondenseq *ces,
                                       GtCondenseqHmmsearchArguments *arguments,
                                       GtLogger *logger,
                                       GtError *err) {
  int had_err = 0;
  GtStr *line = gt_str_new();
  FILE *table = NULL;
  GtSplitter *splitter = gt_splitter_new();
  GtStr *query = gt_str_new(),
        *fine_fasta_filename = gt_str_new_cstr("condenseq");
  GtRBTree *sequences = NULL;
  GtUword filecount = (GtUword) 1;
  unsigned int querycount = 0;
  const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename);
  const GtUword table_name_length = gt_str_length(arguments->outtable_filename);

  table = gt_xfopen(table_filename, "r");

  sequences = gt_rbtree_new(hmmsearch_cmp_seqnum,
                            hmmsearch_tree_free_node, NULL);

  while (!had_err && gt_str_read_next_line(line, table) == 0) {
    char *c_line = gt_str_get(line);
    GtUword uid;
    const GtUword target_column = 0,
          query_column = (GtUword) 3;

    if (c_line[0] != '#') {
      gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' ');
      gt_assert(gt_splitter_size(splitter) == (GtUword) 23);
      if (sscanf(gt_splitter_get_token(splitter, target_column),
                 GT_WU, &uid) != 1) {
        gt_error_set(err, "couldn't parse target number: %s",
                     gt_splitter_get_token(splitter, target_column));
        had_err = -1;
      }
      if (gt_str_length(query) == 0 ||
          strcmp(gt_str_get(query),
                 gt_splitter_get_token(splitter, query_column)) != 0) {
        gt_str_set(query, gt_splitter_get_token(splitter, query_column));
        gt_logger_log(logger, "new query: %s", gt_str_get(query));
        querycount++;
      }
      if (!had_err && querycount == arguments->max_queries) {
        hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
        if (table_name_length != 0)
          gt_str_append_uword(arguments->outtable_filename, filecount++);
        had_err =
          hmmsearch_call_fine_search(table_name_length != 0 ?
                                       arguments->outtable_filename :
                                       NULL,
                                     gt_str_get(fine_fasta_filename),
                                     gt_str_get(arguments->hmmsearch_path),
                                     gt_str_get(arguments->hmm),
                                     logger, err);
        gt_rbtree_clear(sequences);
        gt_str_set_length(fine_fasta_filename, fine_fasta_name_length);
        if (table_name_length != 0)
          gt_str_set_length(arguments->outtable_filename, table_name_length);
        querycount = 0;
      }
      if (!had_err) {
        if (gt_condenseq_each_redundant_seq(ces, uid,
                                            hmmsearch_process_seq,
                                            sequences, err) == 0) {
          had_err = -1;
        }
      }
      gt_splitter_reset(splitter);
    }
    gt_str_reset(line);
  }
  gt_splitter_delete(splitter);
  gt_str_delete(line);
  gt_str_delete(query);
  gt_xfclose(table);

  if (!had_err) {
    hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
    if (table_name_length != 0)
      gt_str_append_uword(arguments->outtable_filename, filecount++);
    had_err =
      hmmsearch_call_fine_search(table_name_length != 0 ?
                                 arguments->outtable_filename :
                                 NULL,
                                 gt_str_get(fine_fasta_filename),
                                 gt_str_get(arguments->hmmsearch_path),
                                 gt_str_get(arguments->hmm),
                                 logger, err);
  }
  gt_log_log("created " GT_WU " files", filecount);
  gt_rbtree_delete(sequences);
  gt_str_delete(fine_fasta_filename);
  return had_err;
}
Ejemplo n.º 9
0
int gt_gtdata_show_help(const char *progname, GT_UNUSED void *unused,
                        GtError *err)
{
  GtSplitter *splitter;
  GtStr *doc_file;
  lua_State *L = NULL;
  char *prog, *bn;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(progname);

  prog = gt_cstr_dup(progname); /* create modifiable copy for splitter */
  splitter = gt_splitter_new();
  gt_splitter_split(splitter, prog, strlen(prog), ' ');
  doc_file = gt_get_gtdata_path(gt_splitter_get_token(splitter, 0), err);
  if (!doc_file)
    had_err = -1;

  if (!had_err) {
    gt_str_append_cstr(doc_file, "/doc/");
    /* create Lua & push gtdata_doc_dir to Lua */
    L = luaL_newstate();
    if (!L) {
      gt_error_set(err, "out of memory (cannot create new Lua state)");
      had_err = -1;
    }
  }

  if (!had_err) {
    luaL_openlibs(L);
    lua_pushstring(L, gt_str_get(doc_file));
    lua_setglobal(L, "gtdata_doc_dir");
    /* finish creating doc_file */
    if (gt_splitter_size(splitter) == 1) {
      /* special case for `gt` */
      bn = gt_basename(progname);
      gt_str_append_cstr(doc_file, bn);
      gt_free(bn);
    }
    else {
      /* general case for the tools */
      gt_str_append_cstr(doc_file,
                      gt_splitter_get_token(splitter,
                                         gt_splitter_size(splitter) - 1));
    }
    gt_str_append_cstr(doc_file, ".lua");
    /* execute doc_file */
    if (luaL_loadfile(L, gt_str_get(doc_file)) || lua_pcall(L, 0, 0, 0)) {
      gt_error_set(err, "cannot run doc file: %s", lua_tostring(L, -1));
      had_err = -1;
    }
  }

  /* free */
  if (L) lua_close(L);
  gt_str_delete(doc_file);
  gt_splitter_delete(splitter);
  gt_free(prog);

  return had_err;
}
Ejemplo n.º 10
0
int gt_gtf_parser_parse(GtGTFParser *parser, GtQueue *genome_nodes,
                        GtStr *filenamestr, GtFile *fpin, bool be_tolerant,
                        GtError *err)
{
  GtStr *seqid_str, *source_str, *line_buffer;
  char *line;
  size_t line_length;
  GtUword i, line_number = 0;
  GtGenomeNode *gn;
  GtRange range;
  GtPhase phase_value;
  GtStrand gt_strand_value;
  GtSplitter *splitter, *attribute_splitter;
  float score_value;
  char *seqname,
       *source,
       *feature,
       *start,
       *end,
       *score,
       *strand,
       *frame,
       *attributes,
       *token,
       *gene_id,
       *gene_name = NULL,
       *transcript_id,
       *transcript_name = NULL,
       **tokens;
  GtHashmap *transcript_id_hash; /* map from transcript id to array of genome
                                    nodes */
  GtArray *gt_genome_node_array;
  ConstructionInfo cinfo;
  GTF_feature_type gtf_feature_type;
  GT_UNUSED bool gff_type_is_valid = false;
  const char *type = NULL;
  const char *filename;
  bool score_is_defined;
  int had_err = 0;

  gt_assert(parser && genome_nodes);
  gt_error_check(err);

  filename = gt_str_get(filenamestr);

  /* alloc */
  line_buffer = gt_str_new();
  splitter = gt_splitter_new(),
  attribute_splitter = gt_splitter_new();

#define HANDLE_ERROR                                                \
        if (had_err) {                                              \
          if (be_tolerant) {                                        \
            fprintf(stderr, "skipping line: %s\n", gt_error_get(err)); \
            gt_error_unset(err);                                       \
            gt_str_reset(line_buffer);                                 \
            had_err = 0;                                            \
            continue;                                               \
          }                                                         \
          else {                                                    \
            had_err = -1;                                           \
            break;                                                  \
          }                                                         \
        }

  while (gt_str_read_next_line_generic(line_buffer, fpin) != EOF) {
    line = gt_str_get(line_buffer);
    line_length = gt_str_length(line_buffer);
    line_number++;
    had_err = 0;

    if (line_length == 0) {
      gt_warning("skipping blank line " GT_WU " in file \"%s\"", line_number,
                 filename);
    }
    else if (line[0] == '#') {
      /* storing comment */
      if (line_length >= 2 && line[1] == '#')
        gn = gt_comment_node_new(line+2); /* store '##' line as '#' line */
      else
        gn = gt_comment_node_new(line+1);
      gt_genome_node_set_origin(gn, filenamestr, line_number);
      gt_queue_add(genome_nodes, gn);
    }
    else {
      /* process tab delimited GTF line */
      gt_splitter_reset(splitter);
      gt_splitter_split(splitter, line, line_length, '\t');
      if (gt_splitter_size(splitter) != 9UL) {
        gt_error_set(err, "line " GT_WU " in file \"%s\" contains " GT_WU
                     " tab (\\t) " "separated fields instead of 9", line_number,
                     filename,
                  gt_splitter_size(splitter));
        had_err = -1;
        break;
      }
      tokens = gt_splitter_get_tokens(splitter);
      seqname    = tokens[0];
      source     = tokens[1];
      feature    = tokens[2];
      start      = tokens[3];
      end        = tokens[4];
      score      = tokens[5];
      strand     = tokens[6];
      frame      = tokens[7];
      attributes = tokens[8];

      /* parse feature */
      if (GTF_feature_type_get(&gtf_feature_type, feature) == -1) {
        /* we skip unknown features */
        fprintf(stderr, "skipping line " GT_WU " in file \"%s\": unknown "
                "feature: \"%s\"\n", line_number, filename, feature);
        gt_str_reset(line_buffer);
        continue;
      }

      /* translate into GFF3 feature type */
      switch (gtf_feature_type) {
        case GTF_CDS:
        case GTF_stop_codon:
          gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker,
                                                       gt_ft_CDS);
          type = gt_ft_CDS;
          break;
        case GTF_exon:
          gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker,
                                                       gt_ft_exon);
          type = gt_ft_exon;
      }
      gt_assert(gff_type_is_valid);

      /* parse the range */
      had_err = gt_parse_range(&range, start, end, line_number, filename, err);
      HANDLE_ERROR;

      /* process seqname (we have to do it here because we need the range) */
      gt_region_node_builder_add_region(parser->region_node_builder, seqname,
                                        range);

      /* parse the score */
      had_err = gt_parse_score(&score_is_defined, &score_value, score,
                               line_number, filename, err);
      HANDLE_ERROR;

      /* parse the strand */
      had_err = gt_parse_strand(&gt_strand_value, strand, line_number, filename,
                               err);
      HANDLE_ERROR;

      /* parse the frame */
      had_err = gt_parse_phase(&phase_value, frame, line_number, filename, err);
      HANDLE_ERROR;

      /* parse the attributes */
      gt_splitter_reset(attribute_splitter);
      gene_id = NULL;
      transcript_id = NULL;
      gt_splitter_split(attribute_splitter, attributes, strlen(attributes),
                        ';');
      for (i = 0; i < gt_splitter_size(attribute_splitter); i++) {
        token = gt_splitter_get_token(attribute_splitter, i);
        /* skip leading blanks */
        while (*token == ' ')
          token++;
        /* look for the two mandatory attributes */
        if (strncmp(token, GENE_ID_ATTRIBUTE, strlen(GENE_ID_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(GENE_ID_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", GENE_ID_ATTRIBUTE, line_number,
                         filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          gene_id = token + strlen(GENE_ID_ATTRIBUTE) + 1;
        }
        else if (strncmp(token, TRANSCRIPT_ID_ATTRIBUTE,
                         strlen(TRANSCRIPT_ID_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(TRANSCRIPT_ID_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          transcript_id = token + strlen(TRANSCRIPT_ID_ATTRIBUTE) + 1;
        }
        else if (strncmp(token, GENE_NAME_ATTRIBUTE,
                         strlen(GENE_NAME_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(GENE_NAME_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", GENE_NAME_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          gene_name = token + strlen(GENE_NAME_ATTRIBUTE) + 1;
          /* for output we want to strip quotes */
          if (*gene_name == '"')
            gene_name++;
          if (gene_name[strlen(gene_name)-1] == '"')
            gene_name[strlen(gene_name)-1] = '\0';
        }
        else if (strncmp(token, TRANSCRIPT_NAME_ATTRIBUTE,
                         strlen(TRANSCRIPT_NAME_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(TRANSCRIPT_NAME_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", TRANSCRIPT_NAME_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          transcript_name = token + strlen(TRANSCRIPT_NAME_ATTRIBUTE) + 1;
          /* for output we want to strip quotes */
          if (*transcript_name == '"')
            transcript_name++;
          if (transcript_name[strlen(transcript_name)-1] == '"')
            transcript_name[strlen(transcript_name)-1] = '\0';
        }
      }

      /* check for the mandatory attributes */
      if (!gene_id) {
        gt_error_set(err, "missing attribute \"%s\" on line " GT_WU
                     " in file \"%s\"", GENE_ID_ATTRIBUTE, line_number,
                     filename);
        had_err = -1;
      }
      HANDLE_ERROR;
      if (!transcript_id) {
        gt_error_set(err, "missing attribute \"%s\" on line " GT_WU
                     " in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number,
                     filename);
        had_err = -1;
      }
      HANDLE_ERROR;

      /* process the mandatory attributes */
      if (!(transcript_id_hash = gt_hashmap_get(parser->gene_id_hash,
                                             gene_id))) {
        transcript_id_hash = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                            (GtFree) gt_array_delete);
        gt_hashmap_add(parser->gene_id_hash, gt_cstr_dup(gene_id),
                    transcript_id_hash);
      }
      gt_assert(transcript_id_hash);

      if (!(gt_genome_node_array = gt_hashmap_get(transcript_id_hash,
                                            transcript_id))) {
        gt_genome_node_array = gt_array_new(sizeof (GtGenomeNode*));
        gt_hashmap_add(transcript_id_hash, gt_cstr_dup(transcript_id),
                    gt_genome_node_array);
      }
      gt_assert(gt_genome_node_array);

      /* save optional gene_name and transcript_name attributes */
      if (transcript_name
            && !gt_hashmap_get(parser->transcript_id_to_name_mapping,
                             transcript_id)) {
        gt_hashmap_add(parser->transcript_id_to_name_mapping,
                    gt_cstr_dup(transcript_id),
                    gt_cstr_dup(transcript_name));
      }
      if (gene_name && !gt_hashmap_get(parser->gene_id_to_name_mapping,
                                    gene_id)) {
        gt_hashmap_add(parser->gene_id_to_name_mapping,
                    gt_cstr_dup(gene_id),
                    gt_cstr_dup(gene_name));
      }

      /* get seqid */
      seqid_str = gt_hashmap_get(parser->seqid_to_str_mapping, seqname);
      if (!seqid_str) {
        seqid_str = gt_str_new_cstr(seqname);
        gt_hashmap_add(parser->seqid_to_str_mapping, gt_str_get(seqid_str),
                       seqid_str);
      }
      gt_assert(seqid_str);

      /* construct the new feature */
      gn = gt_feature_node_new(seqid_str, type, range.start, range.end,
                                 gt_strand_value);
      gt_genome_node_set_origin(gn, filenamestr, line_number);

      /* set source */
      source_str = gt_hashmap_get(parser->source_to_str_mapping, source);
      if (!source_str) {
        source_str = gt_str_new_cstr(source);
        gt_hashmap_add(parser->source_to_str_mapping, gt_str_get(source_str),
                    source_str);
      }
      gt_assert(source_str);
      gt_feature_node_set_source((GtFeatureNode*) gn, source_str);

      if (score_is_defined)
        gt_feature_node_set_score((GtFeatureNode*) gn, score_value);
      if (phase_value != GT_PHASE_UNDEFINED)
        gt_feature_node_set_phase((GtFeatureNode*) gn, phase_value);
      gt_array_add(gt_genome_node_array, gn);
    }

    gt_str_reset(line_buffer);
  }

  /* process all region nodes */
  if (!had_err)
    gt_region_node_builder_build(parser->region_node_builder, genome_nodes);

  /* process all feature nodes */
  cinfo.genome_nodes = genome_nodes;
  cinfo.gene_id_to_name_mapping = parser->gene_id_to_name_mapping;
  cinfo.transcript_id_to_name_mapping = parser->transcript_id_to_name_mapping;
  if (!had_err) {
    had_err = gt_hashmap_foreach(parser->gene_id_hash, construct_genes,
                              &cinfo, err);
  }

  /* free */
  gt_splitter_delete(splitter);
  gt_splitter_delete(attribute_splitter);
  gt_str_delete(line_buffer);

  return had_err;
}