Esempio n. 1
0
static int gt_seqmutate_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  MutateArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  GtSeq *mutated_seq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      mutated_seq = gt_mutate_seq(gt_bioseq_get_description(bioseq, i),
                                  gt_bioseq_get_sequence(bioseq, i),
                                  gt_bioseq_get_sequence_length(bioseq, i),
                                  gt_bioseq_get_alphabet(bioseq),
                                  arguments->rate);
      gt_fasta_show_entry(gt_seq_get_description(mutated_seq),
                          gt_seq_get_orig(mutated_seq),
                          gt_seq_length(mutated_seq),
                          arguments->width, arguments->outfp);
      gt_seq_delete(mutated_seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 2
0
static GtBioseq* bioseq_new_with_recreate_and_type(GtStr *sequence_file,
                                                   bool recreate, GtError *err)
{
  GtBioseq *bs;
  int had_err = 0;
  gt_error_check(err);
  bs = gt_calloc(1, sizeof *bs);
  if (!strcmp(gt_str_get(sequence_file), "-"))
    bs->use_stdin = true;
  if (!bs->use_stdin && !gt_file_exists(gt_str_get(sequence_file))) {
    gt_error_set(err, "sequence file \"%s\" does not exist or is not readable",
                 gt_str_get(sequence_file));
    had_err = -1;
  }
  if (!had_err) {
    bs->sequence_file = gt_str_ref(sequence_file);
    had_err = bioseq_fill(bs, recreate, err);
  }
  if (had_err) {
    gt_bioseq_delete(bs);
    return NULL;
  }
  gt_assert(bs->encseq);
  bs->descriptions = gt_calloc(gt_encseq_num_of_sequences(bs->encseq),
                               sizeof (char*));
  return bs;
}
Esempio n. 3
0
static int gt_extractseq_runner(int argc, const char **argv, int parsed_args,
                                void *tool_arguments, GtError *err)
{
  ExtractSeqArguments *arguments = tool_arguments;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);
  if (gt_str_length(arguments->fastakeyfile)) {
    had_err = process_fastakeyfile(arguments->fastakeyfile, argc - parsed_args,
                            argv + parsed_args, arguments->width,
                            arguments->outfp, err);
  }
  else {
    GtBioseqIterator *bsi;
    GtBioseq *bs;
    bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);
    while (!had_err &&
           !(had_err = gt_bioseq_iterator_next(bsi, &bs, err)) && bs) {
      if (arguments->frompos) {
        had_err = extractseq_pos(arguments->outfp, bs, arguments->frompos,
                                 arguments->topos, arguments->width, err);
      }
      else {
        had_err = extractseq_match(arguments->outfp, bs,
                                   gt_str_get(arguments->pattern),
                                   arguments->width, err);
      }
      gt_bioseq_delete(bs);
    }
    gt_bioseq_iterator_delete(bsi);
  }
  return had_err;
}
Esempio n. 4
0
static int update_bioseq_if_necessary(GtRegionMapping *rm, GtStr *seqid,
                                      GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(rm && seqid);
  if (!rm->sequence_file || gt_str_cmp(rm->sequence_name, seqid)) {
    gt_str_delete(rm->sequence_file);
    rm->sequence_file = region_mapping_map(rm, gt_str_get(seqid), err);
    if (!rm->sequence_file)
      had_err = -1;
    else {
      if (!rm->sequence_name)
        rm->sequence_name = gt_str_new();
      else
        gt_str_reset(rm->sequence_name);
      gt_str_append_str(rm->sequence_name, seqid);
      gt_bioseq_delete(rm->bioseq);
      rm->bioseq = gt_bioseq_new_str(rm->sequence_file, err);
      if (!rm->bioseq)
        had_err = -1;
    }
  }
  return had_err;
}
Esempio n. 5
0
static void gt_bioseq_col_delete(GtSeqCol *sc)
{
  GtUword i;
  GtBioseqCol *bsc;
  bsc = gt_bioseq_col_cast(sc);
  if (!bsc) return;
  gt_seq_info_cache_delete(bsc->grep_cache);
  for (i = 0; i < bsc->num_of_seqfiles; i++)
    gt_bioseq_delete(bsc->bioseqs[i]);
  gt_free(bsc->bioseqs);
}
Esempio n. 6
0
static int extracttarget_from_seqfiles(const char *target,
                                       GtStrArray *seqfiles,
                                       GtError *err)
{
  GtStr *unescaped_target;
  char *escaped_target;
  GtSplitter *splitter;
  unsigned long i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(target && seqfiles);
  splitter = gt_splitter_new();
  unescaped_target = gt_str_new();
  escaped_target = gt_cstr_dup(target);
  gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ',');
  for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) {
    GtSplitter *blank_splitter;
    char *token = gt_splitter_get_token(splitter, i);
    blank_splitter = gt_splitter_new();
    gt_splitter_split(blank_splitter, token, strlen(token), ' ');
    had_err = gt_gff3_unescape(unescaped_target,
                               gt_splitter_get_token(blank_splitter, 0),
                               strlen(gt_splitter_get_token(blank_splitter, 0)),
                               err);
    if (!had_err) {
      unsigned long j;
      for (j = 0; j < gt_str_array_size(seqfiles); j++) {
        unsigned long k;
        GtBioseq *bioseq;
        if (!(bioseq =  gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) {
          had_err = -1;
          break;
        }
        for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) {
          TargetInfo target_info;
          const char *desc = gt_bioseq_get_description(bioseq, k);
          target_info.bioseq = bioseq;
          target_info.seqnum = k;
          gt_string_matching_bmh(desc, strlen(desc),
                                 gt_str_get(unescaped_target),
                                 gt_str_length(unescaped_target), show_target,
                                 &target_info);
        }
        gt_bioseq_delete(bioseq);
      }
    }
    gt_splitter_delete(blank_splitter);
  }
  gt_free(escaped_target);
  gt_str_delete(unescaped_target);
  gt_splitter_delete(splitter);
  return had_err;
}
Esempio n. 7
0
void gt_region_mapping_delete(GtRegionMapping *rm)
{
  if (!rm) return;
  if (rm->reference_count) {
    rm->reference_count--;
    return;
  }
  gt_str_delete(rm->sequence_filename);
  gt_str_delete(rm->sequence_file);
  gt_str_delete(rm->sequence_name);
  gt_mapping_delete(rm->mapping);
  gt_bioseq_delete(rm->bioseq);
  gt_free(rm);
}
Esempio n. 8
0
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  SeqFilterArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  GtBioseq *bioseq;
  unsigned long i;
  unsigned long long passed = 0, filtered = 0, num_of_sequences = 0;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(tool_arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      if ((arguments->minlength == GT_UNDEF_ULONG ||
           gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
          (arguments->maxlength == GT_UNDEF_ULONG ||
           gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength) &&
          (arguments->maxseqnum == GT_UNDEF_ULONG ||
           passed + 1 <= arguments->maxseqnum)) {
        gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
                            gt_bioseq_get_sequence(bioseq, i),
                            gt_bioseq_get_sequence_length(bioseq, i),
                            arguments->width, arguments->outfp);
        passed++;
      }
      else
        filtered++;
      num_of_sequences++;
    }
    gt_bioseq_delete(bioseq);
  }

  /* show statistics */
  if (!had_err) {
    gt_assert(passed + filtered == num_of_sequences);
    fprintf(stderr, "# %llu out of %llu sequences have been removed (%.3f%%)\n",
            filtered, num_of_sequences,
            ((double) filtered / num_of_sequences) * 100.0);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 9
0
static int gt_shredder_runner(GT_UNUSED int argc, const char **argv,
                              int parsed_args, void *tool_arguments,
                              GtError *err)
{
  GtShredderArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  int had_err;
  GtStr *desc;

  gt_error_check(err);
  gt_assert(arguments);

  /* init */
  desc = gt_str_new();
  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  /* shredder */
  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < arguments->coverage; i++) {
      GtShredder *shredder;
      unsigned long fragment_length;
      const char *fragment;
      shredder = gt_shredder_new(bioseq, arguments->minlength,
                              arguments->maxlength);
      gt_shredder_set_overlap(shredder, arguments->overlap);
      gt_shredder_set_sample_probability(shredder,
                                         arguments->sample_probability);
      while ((fragment = gt_shredder_shred(shredder, &fragment_length, desc))) {
        gt_str_append_cstr(desc, " [shreddered fragment]");
        gt_fasta_show_entry(gt_str_get(desc), fragment, fragment_length, 0);
      }
      gt_shredder_delete(shredder);
    }
    gt_bioseq_delete(bioseq);
  }

  /* free */
  gt_bioseq_iterator_delete(bsi);
  gt_str_delete(desc);

  return had_err;
}
Esempio n. 10
0
static int gt_seqtransform_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  SeqtransformArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    GtAlphabet *alphabet;
    bool is_protein;
    alphabet = gt_bioseq_get_alphabet(bioseq);
    is_protein = gt_alphabet_is_protein(alphabet);
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      const char *desc, *suffix = NULL;
      char *seq;
      unsigned long seqlen;
      desc = gt_bioseq_get_description(bioseq, i);
      seq = gt_bioseq_get_sequence(bioseq, i);
      seqlen = gt_bioseq_get_sequence_length(bioseq, i);
      if (arguments->addstopaminos && is_protein && seqlen &&
          seq[seqlen-1] != GT_STOP_AMINO) {
        suffix = GT_STOP_AMINO_CSTR;
      }
      gt_fasta_show_entry_with_suffix(desc, seq, seqlen, suffix,
                                      arguments->width, arguments->outfp);
      gt_free(seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 11
0
static int split_description(const char *filename, GtStr *splitdesc,
                             unsigned long width, bool force, GtError *err)
{
  unsigned long i;
  GtBioseq *bioseq;
  GtStr *descname;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(filename && splitdesc && gt_str_length(splitdesc));

  descname = gt_str_new();
  if (!(bioseq = gt_bioseq_new(filename, err)))
    had_err = -1;

  for (i = 0; !had_err && i < gt_bioseq_number_of_sequences(bioseq); i++) {
    GtFile *outfp;
    char *seq;
    gt_str_reset(descname);
    gt_str_append_str(descname, splitdesc);
    gt_str_append_char(descname, '/');
    gt_str_append_cstr(descname, gt_bioseq_get_description(bioseq, i));
    gt_str_append_cstr(descname, gt_file_suffix(filename));
    if (!(outfp = gt_output_file_xopen_forcecheck(gt_str_get(descname), "w",
                                                 force, err))) {
      had_err = -1;
      break;
    }
    seq = gt_bioseq_get_sequence(bioseq, i);
    gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i), seq,
                        gt_bioseq_get_sequence_length(bioseq, i), width,
                        outfp);
    gt_free(seq);
    gt_file_delete(outfp);
  }

  gt_bioseq_delete(bioseq);
  gt_str_delete(descname);

  return had_err;
}
int main(int argc, char *argv[])
{
  const char *style_file, *png_file, *gff3_file;
  char *seqid;
  GtStyle *style;
  GtBioseq *bioseq;
  GtFeatureIndex *feature_index;
  GtRange range;
  GtDiagram *diagram;
  GtLayout *layout;
  GtCanvas *canvas;
  GtCustomTrack *custom;
  GtUword height, windowsize;
  GtError *err;

  if (argc != 9) {
    fprintf(stderr, "Usage: %s style_file PNG_file GFF3_file Seq_file seqid"
                    " start end windowsize\n",
                    argv[0]);
    return EXIT_FAILURE;
  }

  style_file = argv[1];
  png_file = argv[2];
  gff3_file = argv[3];

  /* initialize */
  gt_lib_init();

  /* create error object */
  err = gt_error_new();

  /* create style */
  if (!(style = gt_style_new(err)))
    handle_error(err);

  /* load style file */
  if (gt_style_load_file(style, style_file, err))
    handle_error(err);

  /* create feature index */
  feature_index = gt_feature_index_memory_new();

  /* add GFF3 file to index */
  if (gt_feature_index_add_gff3file(feature_index, gff3_file, err))
    handle_error(err);

  /* create diagram for first sequence ID in feature index */
  seqid = argv[5];
  if (gt_feature_index_get_range_for_seqid(feature_index, &range, seqid, err))
    handle_error(err);
  sscanf(argv[6], "%lu", &range.start);
  sscanf(argv[7], "%lu", &range.end);
  sscanf(argv[8], "%lu", &windowsize);

  diagram = gt_diagram_new(feature_index, seqid, &range, style, err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* load sequence for GC plot */
  bioseq = gt_bioseq_new(argv[4], err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* create custom track with GC plot for first sequence in file,
     window size 1000, 40px height and average line at 16.5% */
  custom = gt_custom_track_gc_content_new(gt_bioseq_get_sequence(bioseq, 0),
                                          gt_bioseq_get_sequence_length(bioseq,
                                                                        0),
                                          windowsize,
                                          70,
                                          0.165,
                                          true);
  gt_diagram_add_custom_track(diagram, custom);

  /* create layout with given width, determine resulting image height */
  layout = gt_layout_new(diagram, 600, style, err);
  if (gt_error_is_set(err))
    handle_error(err);
  if (gt_layout_get_height(layout, &height, err))
    handle_error(err);

  /* create PNG canvas */
  canvas = gt_canvas_cairo_file_new(style, GT_GRAPHICS_PNG, 600, height,
                                    NULL, err);
  if (!canvas)
    handle_error(err);

  /* sketch layout on canvas */
  if (gt_layout_sketch(layout, canvas, err))
    handle_error(err);

  /* write canvas to file */
  if (gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, png_file, err))
    handle_error(err);

  /* free */
  gt_custom_track_delete(custom);
  gt_bioseq_delete(bioseq);
  gt_canvas_delete(canvas);
  gt_layout_delete(layout);
  gt_diagram_delete(diagram);
  gt_feature_index_delete(feature_index);
  gt_style_delete(style);
  gt_error_delete(err);

  /* perform static data cleanup */
  gt_lib_clean();
  return EXIT_SUCCESS;
}
Esempio n. 13
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream   = NULL,
               *gff3_out_stream  = NULL,
               *ltrdigest_stream = NULL,
               *tab_out_stream   = NULL,
               *last_stream      = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  const char *indexname = argv[arg+1];
  GtLogger *logger = gt_logger_new(arguments->verbose,
                                   GT_LOGGER_DEFLT_PREFIX, stdout);
  GtEncseqLoader *el;
  GtEncseq *encseq;
  gt_error_check(err);
  gt_assert(arguments);

  /* Set sequence encoder options. Defaults are ok. */
  el = gt_encseq_loader_new();
  gt_encseq_loader_set_logger(el, logger);

  /* Open sequence file */
  encseq = gt_encseq_loader_load(el, indexname, err);
  if (!encseq)
    had_err = -1;

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
   arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                                 err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

#ifdef HAVE_HMMER
  /* Open HMMER files if given. */
  if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }
#endif

  if (!had_err)
  {
    /* set up stream flow
     * ------------------*/
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);

    last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream,
                                                  tests_to_run,
                                                  encseq,
                                                  &arguments->pbs_opts,
                                                  &arguments->ppt_opts,
#ifdef HAVE_HMMER
                                                  &arguments->pdom_opts,
#endif
                                                  err);
    if (!ltrdigest_stream)
      had_err = -1;
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream,
                                              tests_to_run,
                                              encseq,
                                              gt_str_get(arguments->prefix),
                                              &arguments->ppt_opts,
                                              &arguments->pbs_opts,
#ifdef HAVE_HMMER
                                              &arguments->pdom_opts,
#endif
                                              gt_str_get(arguments->trna_lib),
                                              argv[arg+1],
                                              argv[arg],
                                              arguments->seqnamelen,
                                              err);
#ifdef HAVE_HMMER
    if (&arguments->pdom_opts.write_alignments)
      gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream);
    if (&arguments->pdom_opts.write_aaseqs)
      gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream);
#endif
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ltrdigest_stream);
  if (tab_out_stream != NULL)
    gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);

  gt_encseq_loader_delete(el);
  gt_encseq_delete(encseq);
  encseq = NULL;
  gt_bioseq_delete(arguments->pbs_opts.trna_lib);
  gt_logger_delete(logger);

  return had_err;
}
Esempio n. 14
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream  = NULL,
               *gff3_out_stream = NULL,
               *pdom_stream     = NULL,
               *ppt_stream      = NULL,
               *pbs_stream      = NULL,
               *tab_out_stream  = NULL,
               *sa_stream       = NULL,
               *last_stream     = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  GtRegionMapping *rmap = NULL;
  GtPdomModelSet *ms = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* determine and open sequence source */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    /* create region mapping */
    rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rmap)
      had_err = -1;
  } else {
    GtEncseqLoader *el;
    GtEncseq *encseq;
    /* no new-style sequence source option given, fall back to legacy syntax */
    if (argc < 3) {
      gt_error_set(err, "missing mandatory argument(s)");
      had_err = -1;
    }
    if (!had_err) {
      el = gt_encseq_loader_new();
      gt_encseq_loader_disable_autosupport(el);
      gt_encseq_loader_require_md5_support(el);
      gt_encseq_loader_require_description_support(el);
      encseq = gt_encseq_loader_load(el, argv[argc-1], err);
      /* XXX: clip off terminal argument */
      gt_free((char*) argv[argc-1]);
      argv[argc-1] = NULL;
      argc--;
      gt_encseq_loader_delete(el);
      if (!encseq)
        had_err = -1;
      else {
        rmap = gt_region_mapping_new_encseq_seqno(encseq);
        gt_encseq_delete(encseq);
      }
    }
  }
  gt_assert(had_err || rmap);

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
    arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                           err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

  /* Set HMMER cutoffs. */
  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->cutoff, rmap, err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 GT_LTRDIGEST_TAG);
        if (arguments->output_all_chains)
          gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*)
                                                                        pdom_v);
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
      }
    } else had_err = -1;
  }

  if (!had_err && arguments->trna_lib_bs) {
    GtNodeVisitor *pbs_v;
    pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius,
                                         arguments->max_edist,
                                         arguments->alilen,
                                         arguments->offsetlen,
                                         arguments->trnaoffsetlen,
                                         arguments->ali_score_match,
                                         arguments->ali_score_mismatch,
                                         arguments->ali_score_insertion,
                                         arguments->ali_score_deletion,
                                         arguments->trna_lib_bs, err);
    if (pbs_v != NULL)
      last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *ppt_v;
    ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len,
                                         arguments->ubox_len,
                                         arguments->ppt_pyrimidine_prob,
                                         arguments->ppt_purine_prob,
                                         arguments->bkg_a_prob,
                                         arguments->bkg_g_prob,
                                         arguments->bkg_t_prob,
                                         arguments->bkg_c_prob,
                                         arguments->ubox_u_prob,
                                         arguments->ppt_radius,
                                         arguments->max_ubox_dist, err);
    if (ppt_v != NULL)
      last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *sa_v;
    sa_v = gt_ltrdigest_strand_assign_visitor_new();
    gt_assert(sa_v);
    last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v);
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new(
                                                  last_stream,
                                                  tests_to_run,
                                                  rmap,
                                                  gt_str_get(arguments->prefix),
                                                  arguments->seqnamelen,
                                                  err);
      if (!tab_out_stream)
        had_err = -1;
      if (!had_err && arguments->print_metadata)
      {
        had_err = gt_ltrdigest_file_out_stream_write_metadata(
                                           (GtLTRdigestFileOutStream*)
                                                                 tab_out_stream,
                                           tests_to_run,
                                           gt_str_get(arguments->trna_lib),
                                           argv[arg],
                                           arguments->ppt_len,
                                           arguments->ubox_len,
                                           arguments->ppt_radius,
                                           arguments->alilen,
                                           arguments->max_edist,
                                           arguments->offsetlen,
                                           arguments->trnaoffsetlen,
                                           arguments->pbs_radius,
                                           arguments->hmm_files,
                                           arguments->chain_max_gap_length,
                                           arguments->evalue_cutoff,
                                           err);
      }
      if (!had_err)
      {
        if (arguments->write_alignments)
          gt_ltrdigest_file_out_stream_enable_pdom_alignment_output(
                                                                tab_out_stream);
        if (arguments->write_aaseqs)
          gt_ltrdigest_file_out_stream_enable_aa_sequence_output(
                                                                tab_out_stream);
      }
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_pdom_model_set_delete(ms);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ppt_stream);
  gt_node_stream_delete(pbs_stream);
  gt_node_stream_delete(sa_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_bioseq_delete(arguments->trna_lib_bs);
  gt_region_mapping_delete(rmap);

  return had_err;
}
Esempio n. 15
0
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  SeqFilterArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  GtBioseq *bioseq;
  GtUint64 passed = 0, filtered = 0, num_of_sequences = 0, steps = 0;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(tool_arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) &&
         bioseq != NULL) {
    GtUword i;
    GtUint64 current_num = gt_bioseq_number_of_sequences(bioseq);
    for (i = 0;
         i < current_num &&
         (arguments->maxseqnum == GT_UNDEF_UWORD ||
          passed + 1 <= arguments->maxseqnum);
         i++) {
      char *seq;
      if ((arguments->step == 1 ||
           steps + 1 == arguments->step) &&
          (arguments->sample_prob == 1.0 ||
           gt_rand_0_to_1() <= arguments->sample_prob) &&
          (arguments->minlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
          (arguments->maxlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength)) {
        seq = gt_bioseq_get_sequence(bioseq, i);
        gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
                            seq,
                            gt_bioseq_get_sequence_length(bioseq, i),
                            arguments->width, arguments->outfp);
        gt_free(seq);
        passed++;
      }
      else {
        filtered++;
      }
      steps = (steps + 1 == arguments->step) ? 0 : steps + 1;
    }
    filtered += current_num - i;
    num_of_sequences += current_num;
    gt_bioseq_delete(bioseq);
  }

  /* show statistics */
  if (!had_err) {
    gt_assert(passed + filtered == num_of_sequences);
    fprintf(stderr, "# " GT_LLU " out of " GT_LLU
            " sequences have been removed (%.3f%%)\n",
            filtered, num_of_sequences,
            ((double) filtered / num_of_sequences) * 100.0);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 16
0
static int gt_sequniq_runner(int argc, const char **argv, int parsed_args,
                             void *tool_arguments, GtError *err)
{
  GtSequniqArguments *arguments = tool_arguments;
  GtUint64 duplicates = 0, num_of_sequences = 0;
  int i, had_err = 0;
  GtMD5Set *md5set;

  gt_error_check(err);
  gt_assert(arguments);
  md5set = gt_md5set_new(arguments->nofseqs);
  if (!arguments->seqit) {
    GtUword j;
    GtBioseq *bs;

    for (i = parsed_args; !had_err && i < argc; i++) {
      if (!(bs = gt_bioseq_new(argv[i], err)))
        had_err = -1;
      if (!had_err) {
        GtMD5SetStatus retval;
        for (j = 0; j < gt_bioseq_number_of_sequences(bs) && !had_err; j++) {
          char *seq = gt_bioseq_get_sequence(bs, j);
          retval = gt_md5set_add_sequence(md5set, seq,
                                          gt_bioseq_get_sequence_length(bs, j),
                                          arguments->rev, err);
          if (retval == GT_MD5SET_NOT_FOUND)
            gt_fasta_show_entry(gt_bioseq_get_description(bs, j), seq,
                                gt_bioseq_get_sequence_length(bs, j),
                                arguments->width, arguments->outfp);
          else if (retval != GT_MD5SET_ERROR)
            duplicates++;
          else
            had_err = -1;
          num_of_sequences++;
          gt_free(seq);
        }
        gt_bioseq_delete(bs);
      }
    }
  }
  else {
    GtSeqIterator *seqit;
    GtStrArray *files;
    off_t totalsize;
    const GtUchar *sequence;
    char *desc;
    GtUword len;

    files = gt_str_array_new();
    for (i = parsed_args; i < argc; i++)
      gt_str_array_add_cstr(files, argv[i]);
    totalsize = gt_files_estimate_total_size(files);
    seqit = gt_seq_iterator_sequence_buffer_new(files, err);
    if (!seqit)
      had_err = -1;
    if (!had_err) {
      if (arguments->verbose) {
        gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit,
                                                          (GtUint64) totalsize),
                             (GtUint64) totalsize);
      }
      while (!had_err) {
        GtMD5SetStatus retval;
        if ((gt_seq_iterator_next(seqit, &sequence, &len, &desc, err)) != 1)
          break;

        retval = gt_md5set_add_sequence(md5set, (const char*) sequence, len,
                                        arguments->rev, err);
        if (retval == GT_MD5SET_NOT_FOUND)
          gt_fasta_show_entry(desc, (const char*) sequence, len,
                              arguments->width, arguments->outfp);
        else if (retval != GT_MD5SET_ERROR)
          duplicates++;
        else
          had_err = -1;
        num_of_sequences++;
      }
      if (arguments->verbose)
        gt_progressbar_stop();
      gt_seq_iterator_delete(seqit);
    }
    gt_str_array_delete(files);
  }

  /* show statistics */
  if (!had_err) {
    fprintf(stderr,
            "# "GT_WU" out of "GT_WU" sequences have been removed (%.3f%%)\n",
            (GtUword)duplicates, (GtUword)num_of_sequences,
            ((double) duplicates / (double)num_of_sequences) * 100.0);
  }

  gt_md5set_delete(md5set);
  return had_err;
}
Esempio n. 17
0
int gt_pbs_unit_test(GtError *err)
{
  int had_err = 0;
  GtLTRElement element;
  GtPBSOptions o;
  GtStr *tmpfilename;
  FILE *tmpfp;
  GtPBSResults *res;
  GtPBSHit *hit;
  double score1, score2;
  GtRange rng;
  char *rev_seq,
       *seq,
       tmp[BUFSIZ];
  const char *fullseq =                           "aaaaaaaaaaaaaaaaaaaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "acatactaggatgctag" /* <- PBS forward */
                                     "aatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatag"
                                   /* PBS reverse -> */ "gatcctaaggctac"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "aaaaaaaaaaaaaaaaaaaa";

  /* notice previous errors */
  gt_error_check(err);

  /* create temporary tRNA library file */
  tmpfilename = gt_str_new();
  tmpfp = gt_xtmpfp(tmpfilename);
  fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n"
                 ">test2\ncccccccccgatcctagggctaccctttc\n");
  gt_fa_xfclose(tmpfp);
  ensure(had_err, gt_file_exists(gt_str_get(tmpfilename)));

  /* setup testing parameters */
  o.radius = 30;
  o.max_edist = 1;
  o.alilen.start = 11;
  o.alilen.end = 30;
  o.offsetlen.start = 0;
  o.offsetlen.end = 5;
  o.trnaoffsetlen.start = 0;
  o.trnaoffsetlen.end =  40;
  o.ali_score_match = 5;
  o.ali_score_mismatch = -10;
  o.ali_score_insertion = o.ali_score_deletion = -20;
  o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err);
  ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2);

  element.leftLTR_5 = 20;
  element.leftLTR_3 = 119;
  element.rightLTR_5 = 520;
  element.rightLTR_3 = 619;

  /* setup sequences */
  seq     = gt_malloc(600 * sizeof (char));
  rev_seq = gt_malloc(600 * sizeof (char));
  memcpy(seq,     fullseq + 20, 600);
  memcpy(rev_seq, fullseq + 20, 600);
  gt_reverse_complement(rev_seq, 600, err);

  /* try to find PBS in sequences */
  res = gt_pbs_find(seq, rev_seq, &element, &o, err);
  ensure(had_err, res != NULL);
  ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2);

  /* check first hit on forward strand */
  hit = gt_pbs_results_get_ranked_hit(res, 0);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 120);
  ensure(had_err, rng.end == 136);
  score1 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0);

  /* check second hit on reverse strand */
  hit = gt_pbs_results_get_ranked_hit(res, 1);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 1);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 506);
  ensure(had_err, rng.end == 519);
  score2 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_double_compare(score1, score2) > 0);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0);

  /* clean up */
  gt_xremove(gt_str_get(tmpfilename));
  ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename)));
  gt_str_delete(tmpfilename);
  gt_bioseq_delete(o.trna_lib);
  gt_free(rev_seq);
  gt_free(seq);
  gt_pbs_results_delete(res);

  return had_err;
}
Esempio n. 18
0
int gth_bssm_param_parameterize(GthBSSMParam *bssm_param, const char *path,
                                Termtype termtype, bool gzip, GtError *err)
{
    GtAlphabet *alphabet = NULL;
    GtBioseq *bioseq;
    GtStr *file2proc;
    GtUword i, j;
    int had_err = 0;
    gt_error_check(err);

    file2proc = gt_str_new();

    /* set version number */
    bssm_param->version_num = (unsigned char) MYVERSION;

    /* set model to true and set window sizes */
    switch (termtype) {
    case GT_DONOR_TYPE:
        bssm_param->gt_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gt_donor_model);
        break;
    case GC_DONOR_TYPE:
        bssm_param->gc_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gc_donor_model);
        break;
    case AG_ACCEPTOR_TYPE:
        bssm_param->ag_acceptor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->ag_acceptor_model);
        break;
    default:
        gt_assert(0);
    }

    for (i = 0; !had_err && i < NUMOFFILES; i++) {
        /* process datafile */
        gt_str_append_cstr(file2proc, path);
        switch (termtype) {
        case GT_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GT_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case GC_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GC_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case AG_ACCEPTOR_TYPE:
            gt_str_append_cstr(file2proc, "/AG_acceptor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        default:
            gt_assert(0);
        }

        if (gzip)
            gt_str_append_cstr(file2proc, ".gz");

        if (!(bioseq = gt_bioseq_new(gt_str_get(file2proc), err)))
            had_err = -1;

        if (!had_err)
            alphabet = gt_bioseq_get_alphabet(bioseq);

        /* check here if all sequences have the length 102 and correct bases at
           positions 51 and 52 (i.e., GT, GC, or AG) */
        for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) {
            GtUchar encoded_seq[2];
            /* check length */
            if (gt_bioseq_get_sequence_length(bioseq, j) != STRINGSIZE) {
                gt_error_set(err,
                             "sequence "GT_WU" in file \"%s\" does not have length %u",
                             j, gt_str_get(file2proc), STRINGSIZE);
                had_err = -1;
            }
            encoded_seq[0] = gt_bioseq_get_encoded_char(bioseq, j, 50);
            encoded_seq[1] = gt_bioseq_get_encoded_char(bioseq, j, 51);
            if (!had_err) {
                /* check base correctness */
                switch (termtype) {
                case GT_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'T')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GT "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case GC_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'C')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GC "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case AG_ACCEPTOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'A') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'G')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a AG "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                default:
                    gt_assert(0);
                }
            }
        }

        if (!had_err) {
            switch (termtype) {
            case GT_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gt_donor_model, i);
                break;
            case GC_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gc_donor_model, i);
                break;
            case AG_ACCEPTOR_TYPE:
                build_bssm(bioseq, &bssm_param->ag_acceptor_model, i);
                break;
            default:
                gt_assert(0);
            }
        }

        /* reset */
        gt_str_reset(file2proc);

        /* free space */
        gt_bioseq_delete(bioseq);
    }
    gt_str_delete(file2proc);

    return had_err;
}
Esempio n. 19
0
static int gt_sketch_page_runner(GT_UNUSED int argc,
                                 const char **argv,
                                 int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  SketchPageArguments *arguments = tool_arguments;
  int had_err = 0;
  GtFeatureIndex *features = NULL;
  GtRange qry_range, sequence_region_range;
  GtStyle *sty = NULL;
  GtStr *prog, *gt_style_file;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtBioseq *bioseq = NULL;
  GtCanvas *canvas = NULL;
  const char *seqid = NULL, *outfile;
  unsigned long start, height, num_pages = 0;
  double offsetpos, usable_height;
  cairo_surface_t *surf = NULL;
  cairo_t *cr = NULL;
  GtTextWidthCalculator *twc;
  gt_error_check(err);

  features = gt_feature_index_memory_new();

  if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6))
    gt_warning("Your cairo library (version %s) is older than version 1.8.6! "
               "These versions contain a bug which may result in "
               "corrupted PDF output!", cairo_version_string());

  /* get style */
  sty = gt_style_new(err);
  if (gt_str_length(arguments->stylefile) == 0)
  {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, argv[0],
                          gt_cstr_length_up_to_char(argv[0], ' '));
    gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err);
    gt_str_delete(prog);
    gt_str_append_cstr(gt_style_file, "/sketch/default.style");
  }
  else
  {
    gt_style_file = gt_str_ref(arguments->stylefile);
  }
  had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err);

  outfile = argv[parsed_args];
  if (!had_err)
  {
    /* get features */
    had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err);
     if (!had_err && gt_str_length(arguments->seqid) == 0) {
      seqid = gt_feature_index_get_first_seqid(features);
      if (seqid == NULL)
      {
        gt_error_set(err, "GFF input file must contain a sequence region!");
        had_err = -1;
      }
    }
    else if (!had_err
               && !gt_feature_index_has_seqid(features,
                                              gt_str_get(arguments->seqid)))
    {
      gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                   gt_str_get(arguments->seqid));
      had_err = -1;
    }
    else if (!had_err)
      seqid = gt_str_get(arguments->seqid);
  }

  /* set text */
  if (gt_str_length(arguments->text) == 0)
  {
    gt_str_delete(arguments->text);
    arguments->text = gt_str_new_cstr(argv[parsed_args+1]);
  }

  if (!had_err)
  {
    /* set display range */
    gt_feature_index_get_range_for_seqid(features, &sequence_region_range,
                                         seqid);
    qry_range.start = (arguments->range.start == GT_UNDEF_ULONG ?
                         sequence_region_range.start :
                         arguments->range.start);
    qry_range.end   = (arguments->range.end == GT_UNDEF_ULONG ?
                         sequence_region_range.end :
                         arguments->range.end);

    /* set output format */
    if (strcmp(gt_str_get(arguments->format), "pdf") == 0)
    {
      surf = cairo_pdf_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    else if (strcmp(gt_str_get(arguments->format), "ps") == 0)
    {
      surf =  cairo_ps_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    gt_log_log("created page with %.2f:%.2f dimensions\n",
                                                  mm_to_pt(arguments->pwidth),
                                                  mm_to_pt(arguments->pheight));

    offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
    usable_height = mm_to_pt(arguments->pheight)
                              - arguments->theight
                              - arguments->theight
                              - 4*TEXT_SPACER;

    if (gt_str_length(arguments->seqfile) > 0) {
      bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err);
    }

    cr = cairo_create(surf);
    cairo_set_font_size(cr, 8);
    twc = gt_text_width_calculator_cairo_new(cr, sty);
    for (start = qry_range.start; start <= qry_range.end;
         start += arguments->width)
    {
      GtRange single_range;
      GtCustomTrack *ct = NULL;
      const char *seq;
      single_range.start = start;
      single_range.end = start + arguments->width;

      if (had_err)
        break;

      d = gt_diagram_new(features, seqid, &single_range, sty, err);
      if (!d) {
        had_err = -1;
        break;
      }
      if (bioseq) {
        seq = gt_bioseq_get_sequence(bioseq, 0);
        ct = gt_custom_track_gc_content_new(seq,
                                      gt_bioseq_get_sequence_length(bioseq, 0),
                                      800, 70, 0.4, true);
        gt_diagram_add_custom_track(d, ct);
      }

      l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err);
      had_err = gt_layout_get_height(l, &height, err);
      if (!had_err) {
        if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER
              - arguments->theight, offsetpos + height))
        {
            draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1],
                        seqid, num_pages, mm_to_pt(arguments->pwidth),
                        mm_to_pt(arguments->pheight),
                        arguments->theight);
          cairo_show_page(cr);
          offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
          num_pages++;
        }
        canvas = gt_canvas_cairo_context_new(sty,
                                             cr,
                                             offsetpos,
                                             mm_to_pt(arguments->pwidth),
                                             height,
                                             NULL,
                                             err);
        if (!canvas)
          had_err = -1;
        offsetpos += height;
        if (!had_err)
          had_err = gt_layout_sketch(l, canvas, err);
      }
      gt_canvas_delete(canvas);
      gt_layout_delete(l);
      gt_diagram_delete(d);
      if (ct)
        gt_custom_track_delete(ct);
    }
    draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid,
                num_pages, mm_to_pt(arguments->pwidth),
                mm_to_pt(arguments->pheight),
                arguments->theight);
    cairo_show_page(cr);
    num_pages++;
    gt_log_log("finished, should be %lu pages\n", num_pages);
    gt_text_width_calculator_delete(twc);
    cairo_destroy(cr);
    cairo_surface_flush(surf);
    cairo_surface_finish(surf);
    cairo_surface_destroy(surf);
    cairo_debug_reset_static_data();
    if (bioseq)
      gt_bioseq_delete(bioseq);
    gt_style_delete(sty);
    gt_str_delete(gt_style_file);
    gt_feature_index_delete(features);
  }
  return had_err;
}