Exemple #1
0
int gt_mmapandread(int argc, const char **argv, GtError *err)
{
  int i, fd, parsed_args;
  void *map;
  struct stat sb;
  unsigned long long j;
  unsigned int byte = 0;
  gt_error_check(err);

  /* option parsing */
  switch (parse_options(&parsed_args, argc, argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR: return -1;
    case GT_OPTION_PARSER_REQUESTS_EXIT: return 0;
  }

  /* iterate over all files */
  for (i = parsed_args; i < argc; i++) {
    /* open file */
    fd = gt_xopen(argv[i], O_RDONLY, 0);

    /* get file statistics */
    gt_xfstat(fd, &sb);

    if (sb.st_size == 0)
      printf("file \"%s\" is empty\n", argv[i]);
    else if (!(sb.st_mode & S_IFREG))
      printf("\"%s\" is not a regular file\n", argv[i]);
    else {
      /* map file */
      map = gt_xmmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);

      /* read file */
      printf("reading file \"%s\"\n", argv[i]);
      j = 0;
      gt_progressbar_start(&j, (unsigned long long) sb.st_size);
      for (; j < (unsigned long long) sb.st_size; j++)
        byte |= (unsigned int) ((char*) map)[j];
      gt_progressbar_stop();

      /* unmap file */
      gt_xmunmap(map, sb.st_size);
    }

    /* close file */
    gt_xclose(fd);
  }

  if (!byte)
    printf("all read files contained only null characters\n");

  return 0;
}
Exemple #2
0
unsigned long gt_contfind_bottomup(Sequentialsuffixarrayreader *ssar,
                     bool show_progressbar, GtBitsequence *contained,
                     unsigned long firstrevcompl,
                     unsigned long read_length /* 0 = variable */)
{
  ContfindBUstate state;
  unsigned long totallength;
  GT_UNUSED int retval;

  gt_assert(ssar != NULL);
  gt_assert(contained != NULL);

  state.contained = contained;
  state.encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  totallength = gt_encseq_total_length(state.encseq);
  state.nofsequences = gt_encseq_num_of_sequences(state.encseq);

  if (read_length == 0)
  {
    prepare_sspbittab_and_shortest(totallength, &state);
  }
  else
  {
    state.shortest = read_length;
    state.spacing = read_length + 1;
  }

  state.show_progressbar = show_progressbar;
  state.csize            = 0;
  state.cmin             = 0;
  state.firstrevcompl    = firstrevcompl;
  state.counter          = 0;

  if (show_progressbar)
  {
    state.progress = 0;
    gt_progressbar_start(&(state.progress),
        (unsigned long long)totallength);
  }

  retval = (read_length == 0)
      ? gt_esa_bottomup_rdjcv(ssar, &state, NULL)
      : gt_esa_bottomup_rdjce(ssar, &state, NULL);
  gt_assert(retval == 0);

  if (show_progressbar)
    gt_progressbar_stop();
  if (read_length == 0)
    gt_free(state.sspbittab);
  return state.counter;
}
static inline void rdj_pairwise_generic(bool use_dp, GtOvlfindMode m,
    GtEncseq *encseq, bool revcompl, bool show_progressbar, bool use_kmp,
    double max_error, GtUword min_length, bool find_nonmaximal,
    GtSpmproc proc, GtSpmprocA proc_a, void* procdata, bool cntfilter,
    GtBitsequence *cntreads_in, GtBitsequence **cntreads_out,
    GtUword *nofreads)
{
  GtContfind containment_status;
  GtBitsequence *cntreads = NULL;
  GtUint64 progress = 0;
  GtUword i, j, startpos, v_seqnum, nofsequences, n;
  struct Read u, v;
  struct Data d;
  gt_kmp_t** kmp_values = NULL;

  GT_RDJ_PAIRWISE_INIT_STRUCT_DATA(d, proc, proc_a, procdata, &u, &v, 0);

  gt_assert(encseq != NULL);

  d.mode = m;
  if ((m == GT_OVLFIND_ALL) && cntfilter)
    d.mode = GT_OVLFIND_PROPER_SPM;

  n = gt_encseq_num_of_sequences(encseq);
  if (use_kmp)
    kmp_values = prepare_kmp_values(encseq, n);
  nofsequences = n;
  if (revcompl)
    n = n >> 1;
  if (cntreads_in != NULL)
    cntreads = cntreads_in;
  else if (m != GT_OVLFIND_SPM)
    GT_INITBITTAB(cntreads, n);
  if (show_progressbar) gt_progressbar_start(&progress, (GtUint64)n *
      ((GtUint64)n - 1ULL) / 2ULL);

  for (i = 0; i < n; i++)
  {
    u.seqnum = i;
    u.direct = true;
    u.len = gt_encseq_seqlength(encseq, i);
    u.seq = gt_malloc(sizeof (char) * (u.len + 1));
    startpos = gt_encseq_seqstartpos(encseq, i);
    gt_encseq_extract_decoded(encseq, u.seq, startpos, startpos + u.len - 1);
    u.seq[u.len] = '\0';
    if (use_kmp)
    {
      gt_assert(kmp_values != NULL);
      u.pi = kmp_values[i];
    }

    for (j = i; j < n; j++)
    {
      if (cntfilter)
      {
        gt_assert(cntreads != NULL);
        if ((bool)GT_ISIBITSET(cntreads, i)) break;
        if ((bool)GT_ISIBITSET(cntreads, j)) continue;
      }

      v.seqnum = j;

      /* find overlaps using direct v */
      v.direct = true;
      v.len = gt_encseq_seqlength(encseq, j);
      v.seq = gt_malloc(sizeof (char) * (v.len + 1));
      startpos = gt_encseq_seqstartpos(encseq, j);
      gt_encseq_extract_decoded(encseq, v.seq, startpos,
          startpos + v.len - 1);
      v.seq[v.len] = '\0';
      if (use_kmp)
      {
        gt_assert(kmp_values != NULL);
        v.pi = kmp_values[j];
      }
      containment_status = use_dp
          ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal)
          : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal);
      if (m != GT_OVLFIND_SPM)
        mark_contained(containment_status, u.seqnum, v.seqnum, cntreads);

      /* find overlaps using reverse complement of v */
      if (revcompl)
      {
        v_seqnum =  nofsequences - j - 1;
        v.direct = false;
        gt_assert(gt_encseq_seqlength(encseq, j) ==
            gt_encseq_seqlength(encseq, v_seqnum));
        startpos = gt_encseq_seqstartpos(encseq, v_seqnum);
        gt_encseq_extract_decoded(encseq, v.seq, startpos,
            startpos + v.len - 1);
        if (use_kmp)
        {
          gt_assert(kmp_values != NULL);
          v.pi = kmp_values[v_seqnum];
        }
        containment_status = use_dp
          ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal)
          : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal);
        if (m != GT_OVLFIND_SPM)
          mark_contained(containment_status, u.seqnum, v.seqnum, cntreads);
      }
      gt_free(v.seq);
      progress++;
    }
    gt_free(u.seq);
  }

  if (cntreads_out != NULL)
    *cntreads_out = cntreads;
  else if (cntreads_in == NULL)
    gt_free(cntreads);
  if (nofreads != NULL)
    *nofreads = n;
  if (use_kmp)
    free_kmp_values(kmp_values, revcompl ? n << 1 : n);
  if (show_progressbar)
    gt_progressbar_stop();
}
Exemple #4
0
static int testfullscan(const GtStrArray *filenametab,
                        const Encodedsequence *encseq,
                        Readmode readmode,
                        GtError *err)
{
  Seqpos pos, totallength;
  GtUchar ccscan = 0, ccra, ccsr;
  GtSequenceBuffer *fb = NULL;
  int retval;
  bool haserr = false;
  Encodedsequencescanstate *esr;
  unsigned long long fullscanpbar = 0;

  gt_error_check(err);
  totallength = getencseqtotallength(encseq);
  gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength);
  if (filenametab != NULL)
  {
    fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err);
    if (!fb)
      haserr = true;
    if (!haserr)
      gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq));
  }
  if (!haserr) {
    esr = newEncodedsequencescanstate();
    initEncodedsequencescanstate(esr,encseq,readmode,0);
    for (pos=0; /* Nothing */; pos++)
    {
      if (filenametab != NULL && readmode == Forwardmode)
      {
        retval = gt_sequence_buffer_next(fb,&ccscan,err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
      } else
      {
        if (pos >= totallength)
        {
          break;
        }
      }
      ccra = getencodedchar(encseq,pos,readmode); /* Random access */
      if (filenametab != NULL && readmode == Forwardmode)
      {
        if (ccscan != ccra)
        {
          gt_error_set(err,"access=%s, position=" FormatSeqpos
                            ": scan (readnextchar) = %u != "
                            "%u = random access",
                            encseqaccessname(encseq),
                            pos,
                            (unsigned int) ccscan,
                            (unsigned int) ccra);
          haserr = true;
          break;
        }
      }
      ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode);
      if (ccra != ccsr)
      {
        gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos
                          ": random access = %u != %u = sequential read",
                          encseqaccessname(encseq),
                          showreadmode(readmode),
                          pos,
                          (unsigned int) ccra,
                          (unsigned int) ccsr);
        haserr = true;
        break;
      }
      fullscanpbar++;
    }
    gt_progressbar_stop();
  }
  if (!haserr)
  {
    if (pos != totallength)
    {
      gt_error_set(err,"sequence length must be " FormatSeqpos " but is "
                         FormatSeqpos,totallength,pos);
      haserr = true;
    }
  }
  freeEncodedsequencescanstate(&esr);
  gt_sequence_buffer_delete(fb);
  return haserr ? -1 : 0;
}
Exemple #5
0
static int gt_sequniq_runner(int argc, const char **argv, int parsed_args,
                             void *tool_arguments, GtError *err)
{
  GtSequniqArguments *arguments = tool_arguments;
  GtUint64 duplicates = 0, num_of_sequences = 0;
  int i, had_err = 0;
  GtMD5Set *md5set;

  gt_error_check(err);
  gt_assert(arguments);
  md5set = gt_md5set_new(arguments->nofseqs);
  if (!arguments->seqit) {
    GtUword j;
    GtBioseq *bs;

    for (i = parsed_args; !had_err && i < argc; i++) {
      if (!(bs = gt_bioseq_new(argv[i], err)))
        had_err = -1;
      if (!had_err) {
        GtMD5SetStatus retval;
        for (j = 0; j < gt_bioseq_number_of_sequences(bs) && !had_err; j++) {
          char *seq = gt_bioseq_get_sequence(bs, j);
          retval = gt_md5set_add_sequence(md5set, seq,
                                          gt_bioseq_get_sequence_length(bs, j),
                                          arguments->rev, err);
          if (retval == GT_MD5SET_NOT_FOUND)
            gt_fasta_show_entry(gt_bioseq_get_description(bs, j), seq,
                                gt_bioseq_get_sequence_length(bs, j),
                                arguments->width, arguments->outfp);
          else if (retval != GT_MD5SET_ERROR)
            duplicates++;
          else
            had_err = -1;
          num_of_sequences++;
          gt_free(seq);
        }
        gt_bioseq_delete(bs);
      }
    }
  }
  else {
    GtSeqIterator *seqit;
    GtStrArray *files;
    off_t totalsize;
    const GtUchar *sequence;
    char *desc;
    GtUword len;

    files = gt_str_array_new();
    for (i = parsed_args; i < argc; i++)
      gt_str_array_add_cstr(files, argv[i]);
    totalsize = gt_files_estimate_total_size(files);
    seqit = gt_seq_iterator_sequence_buffer_new(files, err);
    if (!seqit)
      had_err = -1;
    if (!had_err) {
      if (arguments->verbose) {
        gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit,
                                                          (GtUint64) totalsize),
                             (GtUint64) totalsize);
      }
      while (!had_err) {
        GtMD5SetStatus retval;
        if ((gt_seq_iterator_next(seqit, &sequence, &len, &desc, err)) != 1)
          break;

        retval = gt_md5set_add_sequence(md5set, (const char*) sequence, len,
                                        arguments->rev, err);
        if (retval == GT_MD5SET_NOT_FOUND)
          gt_fasta_show_entry(desc, (const char*) sequence, len,
                              arguments->width, arguments->outfp);
        else if (retval != GT_MD5SET_ERROR)
          duplicates++;
        else
          had_err = -1;
        num_of_sequences++;
      }
      if (arguments->verbose)
        gt_progressbar_stop();
      gt_seq_iterator_delete(seqit);
    }
    gt_str_array_delete(files);
  }

  /* show statistics */
  if (!had_err) {
    fprintf(stderr,
            "# "GT_WU" out of "GT_WU" sequences have been removed (%.3f%%)\n",
            (GtUword)duplicates, (GtUword)num_of_sequences,
            ((double) duplicates / (double)num_of_sequences) * 100.0);
  }

  gt_md5set_delete(md5set);
  return had_err;
}
Exemple #6
0
int gt_extractkeysfromfastafile(bool verbose,
                                GtFile *outfp,
                                unsigned long width,
                                const GtStr *fileofkeystoextract,
                                GtStrArray *referencefiletab,
                                GtError *err)
{
  GtSeqIterator *seqit;
  const GtUchar *sequence;
  char *desc, *headerbufferspace = NULL, *keyspace = NULL;
  const char *keyptr;
  unsigned long allockeyspace = 0, len, keylen, numofqueries, keyposition,
                countmarkhit = 0;
  int had_err = 0;
  off_t totalsize;
  Fastakeyquery *fastakeyqueries;
  size_t headerbuffersize = 0, headerlength;

  gt_error_check(err);
  fastakeyqueries = readfileofkeystoextract(verbose,&numofqueries,
                                            fileofkeystoextract,err);
  if (fastakeyqueries == NULL)
  {
    return -1;
  }
  totalsize = gt_files_estimate_total_size(referencefiletab);
  if (verbose)
  {
    printf("# estimated total size is " Formatuint64_t "\n",
            PRINTuint64_tcast(totalsize));
  }
  seqit = gt_seqiterator_sequence_buffer_new(referencefiletab, err);
  if (!seqit)
  {
    had_err = -1;
  }
  if (!had_err && verbose)
  {
    gt_progressbar_start(gt_seqiterator_getcurrentcounter(seqit,
                                                          (unsigned long long)
                                                          totalsize),
                                                          (unsigned long long)
                                                          totalsize);
  }
  while (had_err != -1 && countmarkhit < numofqueries)
  {
    had_err = gt_seqiterator_next(seqit, &sequence, &len, &desc, err);
    if (had_err != 1)
    {
      break;
    }
    keyptr = desc2key(&keylen,desc,err);
    if (keyptr == NULL)
    {
      had_err = -1;
    } else
    {
      if (allockeyspace < keylen)
      {
        keyspace = gt_realloc(keyspace,sizeof (*keyspace) * (keylen+1));
        allockeyspace = keylen;
      }
      gt_assert(keyspace != NULL);
      strncpy(keyspace,keyptr,(size_t) keylen);
      keyspace[keylen] = '\0';
      keyposition = searchdesinfastakeyqueries(keyspace,fastakeyqueries,
                                               numofqueries);
      if (keyposition < numofqueries)
      {
        while (keyposition < numofqueries &&
               strcmp(fastakeyqueries[keyposition].fastakey,keyspace) == 0)
        {
#ifndef NDEBUG
          if (fastakeyqueries[keyposition].markhit)
          {
            fprintf(stderr,"key %s was already found before\n",
                     fastakeyqueries[keyposition].fastakey);
            exit(GT_EXIT_PROGRAMMING_ERROR);
          }
#endif
          headerlength = strlen(desc);
          if (headerbuffersize < headerlength + EXTRABUF + 1)
          {
            headerbuffersize = headerlength + EXTRABUF + 1;
            headerbufferspace = gt_realloc(headerbufferspace,
                                           sizeof (*headerbufferspace)
                                           * headerbuffersize);
          }
          if (COMPLETE(fastakeyqueries + keyposition))
          {
            /*
            (void) snprintf(headerbufferspace,headerbuffersize,
                            "%*.*s complete %s",
                            (int) keylen,(int) keylen,keyspace,
                            desc);
            */
            gt_fasta_show_entry(desc, (const char *) sequence, len, width,
                                outfp);
          } else
          {
            (void) snprintf(headerbufferspace,headerbuffersize,
                            "%*.*s %lu %lu %s",
                            (int) keylen,(int) keylen,keyspace,
                            fastakeyqueries[keyposition].frompos,
                            fastakeyqueries[keyposition].topos,
                            desc);
            gt_fasta_show_entry(headerbufferspace,
                                (const char *)
                                (sequence+fastakeyqueries[keyposition].
                                                          frompos - 1),
                                fastakeyqueries[keyposition].topos -
                                fastakeyqueries[keyposition].frompos+1,
                                width, outfp);
          }
          fastakeyqueries[keyposition].markhit = true;
          countmarkhit++;
          keyposition++;
        }
      }
#ifdef SKDEBUG
      printf("%s 1 %lu\n",keyspace, len);
#endif
    }
  }
  gt_free(headerbufferspace);
  gt_free(keyspace);
  if (verbose)
  {
    gt_progressbar_stop();
  }
  if (verbose)
  {
    outputnonmarked(fastakeyqueries,numofqueries);
  }
  fastakeyqueries_delete(fastakeyqueries,numofqueries);
  gt_seqiterator_delete(seqit);
  return had_err;
}
static int gt_convertseq_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GtError *err)
{
  GtConvertseqArguments *arguments = tool_arguments;
  int had_err = 0, i;
  GtFilelengthvalues *flv;
  GtSeqIterator *seqit;
  GtSequenceBuffer *sb = NULL;
  GtStrArray *files;
  const GtUchar *sequence;
  char *desc;
  GtUword len, j;
  off_t totalsize;
  gt_error_check(err);
  gt_assert(arguments != NULL);

  files = gt_str_array_new();
  for (i = parsed_args; i < argc; i++)
  {
    gt_str_array_add_cstr(files, argv[i]);
  }
  totalsize = gt_files_estimate_total_size(files);

  flv = gt_calloc((size_t) gt_str_array_size(files),
                  sizeof (GtFilelengthvalues));

  sb = gt_sequence_buffer_new_guess_type(files, err);
  if (!sb) {
    had_err = -1;
  }
  if (!had_err) {
    gt_sequence_buffer_set_filelengthtab(sb, flv);
    /* read input using seqiterator */
    seqit = gt_seq_iterator_sequence_buffer_new_with_buffer(sb);
    if (arguments->verbose)
    {
      gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit,
                                                           (GtUint64)
                                                           totalsize),
                           (GtUint64) totalsize);
    }
    while (true)
    {
      GtUchar *seq = NULL;
      desc = NULL;
      j = 0UL;
      had_err = gt_seq_iterator_next(seqit, &sequence, &len, &desc, err);
      if (had_err != 1)
        break;
      if (arguments->revcomp) {
        GtUchar *newseq = gt_calloc((size_t) len+1, sizeof (GtUchar));
        memcpy(newseq, sequence, (size_t) len*sizeof (GtUchar));
        had_err = gt_reverse_complement((char*) newseq, len, err);
        if (had_err)
          break;
        seq = newseq;
      } else seq = (GtUchar*) sequence;

      if (!arguments->showseq) {
        bool in_wildcard = false;
        gt_file_xprintf(arguments->outfp, ">%s\n", desc);
        for (i = 0; (GtUword) i < len; i++) {
          if (arguments->reduce_wc_dna) {
            switch (seq[i]) {
              case 'a':
              case 'A':
              case 'c':
              case 'C':
              case 'g':
              case 'G':
              case 't':
              case 'u':
              case 'T':
              case 'U':
                in_wildcard = false;
                gt_file_xfputc((int) seq[i], arguments->outfp);
                j++;
                break;
              default:
                if (!in_wildcard) {
                  in_wildcard = true;
                  if (isupper((int) seq[i]))
                    gt_file_xfputc((int) 'N', arguments->outfp);
                  else
                    gt_file_xfputc((int) 'n', arguments->outfp);
                  j++;
                }
            }
          }
          else if (arguments->reduce_wc_prot) {
            switch (seq[i]) {
              case 'X':
              case 'B':
              case 'Z':
                if (!in_wildcard) {
                  in_wildcard = true;
                  gt_file_xfputc((int) 'N', arguments->outfp);
                  j++;
                }
                break;
              case 'x':
              case 'b':
              case 'z':
                if (!in_wildcard) {
                  in_wildcard = true;
                  gt_file_xfputc((int) 'n', arguments->outfp);
                  j++;
                }
                break;
              default:
                in_wildcard = false;
                gt_file_xfputc((int) seq[i], arguments->outfp);
                j++;
            }
          }
          else {
            gt_file_xfputc((int) seq[i], arguments->outfp);
            j++;
          }
          if (arguments->fastawidth > 0 && j % arguments->fastawidth == 0) {
            j = 0;
            gt_file_xprintf(arguments->outfp, "\n");
          }
        }
        if (arguments->fastawidth == 0 || len % arguments->fastawidth != 0)
            gt_file_xprintf(arguments->outfp, "\n");
      }
      if (arguments->revcomp) {
        gt_free(seq);
      }
    }
    if (arguments->showflv) {
      for (j=0;j<gt_str_array_size(files);j++) {
        fprintf(stderr, "file "GT_WU" (%s): "GT_WU"/"GT_WU"\n",
               j,
               gt_str_array_get(files, j),
               (GtUword) flv[j].length,
               (GtUword) flv[j].effectivelength);
      }
    }
    if (arguments->verbose)
    {
      gt_progressbar_stop();
    }
    gt_sequence_buffer_delete(sb);
    gt_seq_iterator_delete(seqit);
  }
  gt_str_array_delete(files);
  gt_free(flv);

  return had_err;
}
Exemple #8
0
static int gff3_in_stream_plain_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtGFF3InStreamPlain *is = gff3_in_stream_plain_cast(ns);
  GtStr *filenamestr;
  int had_err = 0, status_code;

  gt_error_check(err);

  if (gt_queue_size(is->genome_node_buffer) > 1) {
    /* we still have at least two nodes in the buffer -> serve from there */
    *gn = gt_queue_get(is->genome_node_buffer);
    return 0;
  }

  /* the buffer is empty or has one element */
  gt_assert(gt_queue_size(is->genome_node_buffer) <= 1);

  for (;;) {
    /* open file if necessary */
    if (!is->file_is_open) {
      if (gt_str_array_size(is->files) &&
          is->next_file == gt_str_array_size(is->files)) {
        break;
      }
      if (gt_str_array_size(is->files)) {
        if (strcmp(gt_str_array_get(is->files, is->next_file), "-") == 0) {
          if (is->stdin_argument) {
            gt_error_set(err, "multiple specification of argument file \"-\"");
            had_err = -1;
            break;
          }
          is->fpin = gt_file_xopen(NULL, "r");
          is->file_is_open = true;
          is->stdin_argument = true;
        }
        else {
          is->fpin = gt_file_xopen(gt_str_array_get(is->files,
                                                       is->next_file), "r");
          is->file_is_open = true;
        }
        is->next_file++;
      }
      else {
        if (is->stdin_processed)
          break;
        is->fpin = NULL;
        is->file_is_open = true;
      }
      is->line_number = 0;

      if (!had_err && is->progress_bar) {
        printf("processing file \"%s\"\n", gt_str_array_size(is->files)
               ? gt_str_array_get(is->files, is->next_file-1) : "stdin");
      }
      if (!had_err && is->fpin && is->progress_bar) {
        gt_progressbar_start(&is->line_number,
                            gt_file_number_of_lines(gt_str_array_get(is->files,
                                                             is->next_file-1)));
      }
    }

    gt_assert(is->file_is_open);

    filenamestr = gt_str_array_size(is->files)
                  ? gt_str_array_get_str(is->files, is->next_file-1)
                  : is->stdinstr;
    /* read two nodes */
    had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                is->genome_node_buffer,
                                                is->used_types, filenamestr,
                                                &is->line_number, is->fpin,
                                                err);
    if (had_err)
      break;
    if (status_code != EOF) {
      had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                  is->genome_node_buffer,
                                                  is->used_types, filenamestr,
                                                  &is->line_number, is->fpin,
                                                  err);
      if (had_err)
        break;
    }

    if (status_code == EOF) {
      /* end of current file */
      if (is->progress_bar) gt_progressbar_stop();
      gt_file_delete(is->fpin);
      is->fpin = NULL;
      is->file_is_open = false;
      gt_gff3_parser_reset(is->gff3_parser);
      if (!gt_str_array_size(is->files)) {
        is->stdin_processed = true;
        break;
      }
      continue;
    }

    gt_assert(gt_queue_size(is->genome_node_buffer));

    /* make sure the parsed nodes are sorted */
    if (is->ensure_sorting && gt_queue_size(is->genome_node_buffer) > 1) {
      GtGenomeNode *last_node = NULL;
      /* a sorted stream can have at most one input file */
      gt_assert(gt_str_array_size(is->files) == 0 ||
                gt_str_array_size(is->files) == 1);
      had_err = gt_queue_iterate(is->genome_node_buffer, buffer_is_sorted,
                                 &last_node, err);
    }
    if (!had_err) {
      *gn = gt_queue_get(is->genome_node_buffer);
    }
    return had_err;
  }
  gt_assert(!gt_queue_size(is->genome_node_buffer));
  *gn = NULL;
  return had_err;
}