Exemplo n.º 1
0
GtHcrDecoder *gt_hcr_decoder_new(const char *name, GtAlphabet *alpha,
                                 bool descs, GtTimer *timer, GtError *err)
{
    GtHcrDecoder *hcr_dec;
    int had_err = 0;

    gt_error_check(err);
    if (timer != NULL)
        gt_timer_show_progress(timer, "initialize hcr decoder", stdout);

    hcr_dec = gt_malloc(sizeof (GtHcrDecoder));

    if (descs) {
        hcr_dec->encdesc = gt_encdesc_load(name, err);
        if (gt_error_is_set(err)) {
            had_err = -1;
        }
    }
    else
        hcr_dec->encdesc = NULL;

    if (!had_err) {
        hcr_dec->seq_dec = hcr_seq_decoder_new(alpha, name, err);
        if (!gt_error_is_set(err))
            return hcr_dec;
    }
    gt_hcr_decoder_delete(hcr_dec);
    return NULL;
}
Exemplo n.º 2
0
GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats,
                          AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids);

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}
Exemplo n.º 3
0
FMindex *gt_loadvoidBWTSeqForSA(const char *indexname,
                                bool withpckbt,
                                GtError *err)
{
  BWTSeq *bwtseq = NULL;
  bool haserr = false;
  GtEncseqMetadata *emd;
  GtAlphabet *alphabet;

  emd = gt_encseq_metadata_new(indexname, err);
  if (emd == NULL) {
    gt_assert(gt_error_is_set(err));
    haserr = true;
  }
  if (!haserr) {
    alphabet = gt_encseq_metadata_alphabet(emd);
    if (alphabet == NULL)
    {
      gt_assert(gt_error_is_set(err));
      haserr = true;
    }
  }
  if (!haserr)
  {
    bwtseq = gt_loadBWTSeqForSA(indexname,
                                BWT_ON_BLOCK_ENC,
                                BWTDEFOPT_MULTI_QUERY,
                                alphabet,
                                err);
    if (bwtseq == NULL)
    {
      gt_assert(gt_error_is_set(err));
      haserr = true;
    }
  }
  if (!haserr)
  {
    if (withpckbt && gt_pckbuckettable_exists(indexname))
    {
      unsigned int numofchars = gt_alphabet_num_of_chars(alphabet);
      bwtseq->pckbuckettable = gt_pckbuckettable_map(indexname,numofchars,err);
      if (bwtseq->pckbuckettable == NULL)
      {
        gt_assert(gt_error_is_set(err));
        haserr = true;
      }
    } else
    {
      bwtseq->pckbuckettable = NULL;
    }
  }
  gt_encseq_metadata_delete(emd);
  if (haserr && bwtseq != NULL)
  {
    gt_deletevoidBWTSeq((FMindex *) bwtseq);
    bwtseq = NULL;
  }
  return haserr ? NULL : (FMindex *) bwtseq;
}
Exemplo n.º 4
0
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name,
        GtError *err)
{
    GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder));
    GtBaseQualDistr *bqd = NULL;
    GtWord end_enc_start_sampling = 0;
    FILE *fp = NULL;
    GT_UNUSED size_t read,
              one = (size_t) 1;

    seq_dec->alpha = alpha;
    seq_dec->alphabet_size = gt_alphabet_size(alpha);
    seq_dec->cur_read = 0;
    seq_dec->data_iter = NULL;
    seq_dec->file_info_rbt = NULL;
    seq_dec->fileinfos = NULL;
    seq_dec->filename = gt_str_new_cstr(name);
    seq_dec->huff_dec = NULL;
    seq_dec->huffman = NULL;
    seq_dec->sampling = NULL;
    seq_dec->symbols = NULL;
    gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX);

    fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err);
    if (gt_error_is_set(err)) {
        hcr_seq_decoder_delete(seq_dec);
        seq_dec = NULL;
    }
    else {
        hcr_read_file_info(seq_dec, fp);

        bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha);
        seq_dec->qual_offset = bqd->qual_offset;

        read = gt_xfread_one(&end_enc_start_sampling, fp);
        gt_assert(read == one);

        seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp);

        seq_decoder_init_huffman(seq_dec, end_enc_start_sampling, bqd, err);
        if (gt_error_is_set(err)) {
            hcr_seq_decoder_delete(seq_dec);
            seq_dec = NULL;
        }
    }

    if (seq_dec != NULL) {
        gt_xfseek(fp, end_enc_start_sampling, SEEK_SET);
        seq_dec->sampling = gt_sampling_read(fp);

        seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos,
                                 seq_dec->num_of_files);
    }

    hcr_base_qual_distr_delete(bqd);
    gt_fa_fclose(fp);
    return seq_dec;
}
Exemplo n.º 5
0
static int canvas_cairo_file_lua_new_generic(lua_State *L, GtGraphicsOutType t)
{
  GtCanvas **canvas;
  GtImageInfo **ii;
  unsigned int width,
               height;
  GtError *err;
  GtStyle *style;
  width = luaL_checkint(L, 1);
  height = luaL_checkint(L, 2);
  /* create canvas */
  style = gt_lua_get_style_from_registry(L);
  canvas = lua_newuserdata(L, sizeof (GtCanvas*));
  gt_assert(canvas);
  /* if a imageinfo object is passed, it must be correct type */
  if (lua_isnil(L, 3)) {
    err = gt_error_new();
    *canvas = gt_canvas_cairo_file_new(style, t, width, height, NULL, err);
  } else {
    ii = check_imageinfo(L, 3);
    err = gt_error_new();
    *canvas = gt_canvas_cairo_file_new(style, t, width, height, *ii, err);
  }
  if (gt_error_is_set(err))
    return gt_lua_error(L, err);
  gt_error_delete(err);
  luaL_getmetatable(L, CANVAS_METATABLE);
  lua_setmetatable(L, -2);
  return 1;
}
Exemplo n.º 6
0
GtIntset *gt_intset_io(GtIntset *intset, FILE *fp, GtError *err)
{
  int had_err = 0;
  GtUword type;
  if (intset == NULL) {
    had_err = gt_intset_read_type_rewind(fp, &type, err);
    if (!had_err && gt_intset_8_file_is_type(type))
      intset = gt_intset_8_io(intset, fp, err);
    else {
      if (!had_err && gt_intset_16_file_is_type(type))
        intset = gt_intset_16_io(intset, fp, err);
      else {
        if (!had_err && gt_intset_32_file_is_type(type))
          intset = gt_intset_32_io(intset, fp, err);
        else {
          if (!gt_error_is_set(err))
            gt_error_set(err, "could not identify intset type from file");
        }
      }
    }
  }
  else {
    gt_assert(intset->c_class != NULL);
    gt_assert(intset->c_class->io_func != NULL);
    intset = intset->c_class->io_func(intset, fp, err);
  }
  return intset;
}
Exemplo n.º 7
0
GtAlphabet* gt_alphabet_new_from_string(const char *alphadef, unsigned long len,
                                        GtError *err)
{
  unsigned long i, j;
  GtStrArray *sa;
  GtAlphabet *alpha;
  gt_assert(alphadef && len > 0);
  gt_error_check(err);

  alpha = gt_alphabet_new_empty();
  gt_assert(alphadef);

  sa = gt_str_array_new();
  j = 0;
  for (i = 0; i < len; i++) {
    if (alphadef[i] == '\n' || i == len - 1) {
      gt_str_array_add_cstr_nt(sa, alphadef+j, i - j);
      j = i+1;
    }
  }
  if (read_symbolmap_from_lines(alpha, NULL, sa, err) != 0) {
    gt_str_array_delete(sa);
    gt_assert(gt_error_is_set(err));
    return NULL;
  }
  gt_str_array_delete(sa);
  return alpha;
}
Exemplo n.º 8
0
int gt_lua_error(lua_State *L, GtError *err)
{
  gt_assert(L && err);
  gt_assert(gt_error_is_set(err));
  luaL_where(L, 1);
  lua_pushstring(L, gt_error_get(err));
  gt_error_delete(err);
  lua_concat(L, 2);
  return lua_error(L);
}
Exemplo n.º 9
0
GtEncseqMetadata* gt_encseq_metadata_new(const char *indexname, GtError *err)
{
  int had_err = 0;
  GtEncseqMetadata *encseq_metadata;
  gt_assert(indexname);
  encseq_metadata = gt_malloc(sizeof (GtEncseqMetadata));
  encseq_metadata->alpha = NULL;
  encseq_metadata->customalphabet = false;
  had_err = readfirstvaluesfromfile(encseq_metadata, indexname, err);
  if (had_err) {
    gt_assert(gt_error_is_set(err));
    gt_free(encseq_metadata);
    encseq_metadata = NULL;
  }
  return encseq_metadata;
}
Exemplo n.º 10
0
static int feature_index_lua_get_first_seqid(lua_State *L)
{
  GtFeatureIndex **feature_index;
  char *seqid;
  GtError *err;
  feature_index = check_feature_index(L, 1);
  err = gt_error_new();
  seqid = gt_feature_index_get_first_seqid(*feature_index, err);
  if (gt_error_is_set(err))
    return gt_lua_error(L, err);
  gt_error_delete(err);
  if (seqid) {
    lua_pushstring(L, seqid);
    gt_free(seqid);
  } else
    lua_pushnil(L);
  return 1;
}
Exemplo n.º 11
0
GtAlphabet* gt_alphabet_new_from_sequence(const GtStrArray *filenametab,
                                          GtError *err)
{
  GtAlphabet *alpha;
  int had_err = 0;
  gt_error_check(err);

  alpha = gt_alphabet_new_empty();
  if (assign_protein_or_dna_alphabet(alpha,filenametab,err) != 0) {
    had_err = -1;
    gt_assert(gt_error_is_set(err));
  }
  if (had_err)
  {
    gt_alphabet_delete(alpha);
    return NULL;
  }
  return alpha;
}
Exemplo n.º 12
0
int gt_tooldriver(int(*tool)(int argc, const char **argv, GtError*),
               int argc, char *argv[])
{
  GtError *err;
  int had_err;
  gt_allocators_init();
  err = gt_error_new();
  gt_error_set_progname(err, argv[0]);
  had_err = tool(argc, (const char**) argv, err);
  if (gt_error_is_set(err)) {
    fprintf(stderr, "%s: error: %s\n", gt_error_get_progname(err),
            gt_error_get(err));
    gt_assert(had_err);
  }
  gt_error_delete(err);
  if (gt_allocators_clean())
    return 2; /* programmer error */
  if (had_err)
    return EXIT_FAILURE;
  return EXIT_SUCCESS;
}
Exemplo n.º 13
0
static int layout_lua_new(lua_State *L)
{
    GtLayout **layout;
    GtDiagram **diagram;
    unsigned int width;
    GtStyle *style;
    GtError *err;
    diagram = check_diagram(L, 1);
    width = luaL_checkint(L, 2);
    /* create layout */
    style = gt_lua_get_style_from_registry(L);
    layout = lua_newuserdata(L, sizeof (GtLayout*));
    gt_assert(layout);
    err = gt_error_new();
    *layout = gt_layout_new(*diagram, width, style, err);
    if (gt_error_is_set(err))
        return gt_lua_error(L, err);
    gt_error_delete(err);
    luaL_getmetatable(L, LAYOUT_METATABLE);
    lua_setmetatable(L, -2);
    return 1;
}
Exemplo n.º 14
0
int gt_toolobjdriver(GtToolConstructor tool_constructor, int argc, char *argv[])
{
  GtTool *tool;
  GtError *err;
  int had_err;
  gt_allocators_init();
  err = gt_error_new();
  gt_error_set_progname(err, argv[0]);
  tool = tool_constructor();
  had_err = gt_tool_run(tool, argc, (const char**) argv, err);
  gt_tool_delete(tool);
  if (gt_error_is_set(err)) {
    fprintf(stderr, "%s: error: %s\n", gt_error_get_progname(err),
            gt_error_get(err));
    gt_assert(had_err);
  }
  gt_error_delete(err);
  if (gt_allocators_clean())
    return 2; /* programmer error */
  if (had_err)
    return EXIT_FAILURE;
  return EXIT_SUCCESS;
}
Exemplo n.º 15
0
int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum,
                          char *seq, char *qual, GtStr *desc, GtError *err)
{
    GtUword nearestsample = 0,
            reads_to_read = 0,
            idx,
            current_read = hcr_dec->seq_dec->cur_read ;
    size_t startofnearestsample = 0;
    GtSampling *sampling;
    HcrHuffDataIterator *data_iter;
    GtHuffmanDecoder *huff_dec;

    gt_error_check(err);
    gt_assert(hcr_dec);
    gt_assert(readnum < hcr_dec->seq_dec->num_of_reads);
    gt_assert(seq != NULL && qual != NULL);

    if (current_read == readnum) {
        if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) {
            gt_assert(gt_error_is_set(err));
            return -1;
        }
    }
    else {
        sampling = hcr_dec->seq_dec->sampling;
        data_iter = hcr_dec->seq_dec->data_iter;
        huff_dec = hcr_dec->seq_dec->huff_dec;

        if (sampling != NULL) {
            gt_sampling_get_page(sampling,
                                 readnum,
                                 &nearestsample,
                                 &startofnearestsample);
            /* nearestsample <= cur_read < readnum: current sample is the right one */
            if (nearestsample <= current_read && current_read <= readnum)
                reads_to_read = readnum - current_read;
            else { /* reset decoder to new sample */
                reset_data_iterator_to_pos(data_iter, startofnearestsample);
                (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
                if (gt_error_is_set(err))
                    return -1;
                reads_to_read = readnum - nearestsample;
                hcr_dec->seq_dec->cur_read = nearestsample;
            }
            gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"",
                       reads_to_read,nearestsample);
            gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample);
        }
        else {
            if (current_read <= readnum)
                reads_to_read = readnum - current_read;
            else {
                reset_data_iterator_to_start(data_iter);
                (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
                if (gt_error_is_set(err))
                    return -1;
                reads_to_read = readnum;
                hcr_dec->seq_dec->cur_read = 0;
            }
        }

        for (idx = 0; idx < reads_to_read; idx++) {
            if (hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1) {
                gt_assert(gt_error_is_set(err));
                return -1;
            }
            gt_log_log("seq:\n%s\nqual:\n%s", seq, qual);
        }

        if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) {
            gt_assert(gt_error_is_set(err));
            return -1;
        }
    }

    if (hcr_dec->encdesc != NULL) {
        if (gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err) == -1) {
            gt_error_set(err, "cannot retrieve description with number "GT_WU"."
                         "(%d)", readnum, __LINE__);
            return -1;
        }
    }
    return 0;
}
Exemplo n.º 16
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream   = NULL,
               *gff3_out_stream  = NULL,
               *ltrdigest_stream = NULL,
               *tab_out_stream   = NULL,
               *last_stream      = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  const char *indexname = argv[arg+1];
  GtLogger *logger = gt_logger_new(arguments->verbose,
                                   GT_LOGGER_DEFLT_PREFIX, stdout);
  GtEncseqLoader *el;
  GtEncseq *encseq;
  gt_error_check(err);
  gt_assert(arguments);

  /* Set sequence encoder options. Defaults are ok. */
  el = gt_encseq_loader_new();
  gt_encseq_loader_set_logger(el, logger);

  /* Open sequence file */
  encseq = gt_encseq_loader_load(el, indexname, err);
  if (!encseq)
    had_err = -1;

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
   arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                                 err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

#ifdef HAVE_HMMER
  /* Open HMMER files if given. */
  if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }
#endif

  if (!had_err)
  {
    /* set up stream flow
     * ------------------*/
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);

    last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream,
                                                  tests_to_run,
                                                  encseq,
                                                  &arguments->pbs_opts,
                                                  &arguments->ppt_opts,
#ifdef HAVE_HMMER
                                                  &arguments->pdom_opts,
#endif
                                                  err);
    if (!ltrdigest_stream)
      had_err = -1;
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream,
                                              tests_to_run,
                                              encseq,
                                              gt_str_get(arguments->prefix),
                                              &arguments->ppt_opts,
                                              &arguments->pbs_opts,
#ifdef HAVE_HMMER
                                              &arguments->pdom_opts,
#endif
                                              gt_str_get(arguments->trna_lib),
                                              argv[arg+1],
                                              argv[arg],
                                              arguments->seqnamelen,
                                              err);
#ifdef HAVE_HMMER
    if (&arguments->pdom_opts.write_alignments)
      gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream);
    if (&arguments->pdom_opts.write_aaseqs)
      gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream);
#endif
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ltrdigest_stream);
  if (tab_out_stream != NULL)
    gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);

  gt_encseq_loader_delete(el);
  gt_encseq_delete(encseq);
  encseq = NULL;
  gt_bioseq_delete(arguments->pbs_opts.trna_lib);
  gt_logger_delete(logger);

  return had_err;
}
Exemplo n.º 17
0
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err)
{
  GtStr *bioseq_index_file = NULL,
        *bioseq_ois_file = NULL,
        *bioseq_sds_file = NULL,
        *bioseq_md5_file = NULL,
        *bioseq_des_file = NULL;
  int had_err = 0;
  GtStr *bioseq_basename;

  gt_assert(!bs->encseq);

  if (bs->use_stdin)
    bioseq_basename = gt_str_new_cstr("stdin");
  else
    bioseq_basename = bs->sequence_file;

  /* construct file names */
  bioseq_index_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX);
  bioseq_ois_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX);
  bioseq_sds_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX);
  bioseq_md5_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX);
  bioseq_des_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX);

  /* construct the bioseq files if necessary */
  if (recreate || bs->use_stdin ||
      !gt_file_exists(gt_str_get(bioseq_index_file)) ||
      !gt_file_exists(gt_str_get(bioseq_ois_file)) ||
      !gt_file_exists(gt_str_get(bioseq_sds_file)) ||
      !gt_file_exists(gt_str_get(bioseq_md5_file)) ||
      !gt_file_exists(gt_str_get(bioseq_des_file)) ||
      gt_file_is_newer(gt_str_get(bs->sequence_file),
                       gt_str_get(bioseq_index_file))) {
    had_err = construct_bioseq_files(bs, bioseq_basename, err);
  }

  if (!had_err) {
    GtEncseqLoader *el = gt_encseq_loader_new();
    gt_encseq_loader_disable_autosupport(el);
    gt_encseq_loader_require_lossless_support(el);
    gt_encseq_loader_require_description_support(el);
    gt_encseq_loader_require_md5_support(el);
    gt_encseq_loader_require_multiseq_support(el);
    bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err);
    if (bs->encseq == NULL) {
      had_err = -1;
      gt_assert(gt_error_is_set(err));
    }
    gt_encseq_loader_delete(el);
  }
  if (!had_err) {
    gt_assert(bs->encseq);
  }

  /* free */
  if (bs->use_stdin)
    gt_str_delete(bioseq_basename);
  gt_str_delete(bioseq_index_file);
  gt_str_delete(bioseq_ois_file);
  gt_str_delete(bioseq_md5_file);
  gt_str_delete(bioseq_sds_file);
  gt_str_delete(bioseq_des_file);

  return had_err;
}
Exemplo n.º 18
0
static void* gt_feature_index_unit_test_query(void *data)
{
  GtFeatureIndexTestShared *shm = (GtFeatureIndexTestShared*) data;
  GtRange rng;
  GtError *err = shm->err;
  GtUword i;
  int had_err = 0;
  GtArray *arr, *arr_ref;

  gt_mutex_lock(shm->mutex);
  if (gt_error_is_set(shm->err)) {
    gt_mutex_unlock(shm->mutex);
    return NULL;
  }
  gt_mutex_unlock(shm->mutex);

  arr = gt_array_new(sizeof (GtFeatureNode*));
  arr_ref = gt_array_new(sizeof (GtFeatureNode*));
  rng.start = random() % (GT_FI_TEST_END - GT_FI_TEST_QUERY_WIDTH);
  rng.end = rng.start + random() % (GT_FI_TEST_QUERY_WIDTH);

  /* get reference set by linear search */
  gt_mutex_lock(shm->mutex);
  for (i=0; i<GT_FI_TEST_FEATURES_PER_THREAD * gt_jobs; i++) {
    GtRange rng2;
    GtFeatureNode *fn;
    fn = *(GtFeatureNode**) gt_array_get(shm->nodes, i);
    rng2 = gt_genome_node_get_range((GtGenomeNode*) fn);
    if (gt_range_overlap(&rng, &rng2)) {
      gt_array_add(arr_ref, fn);
    }
  }
  gt_mutex_unlock(shm->mutex);

  /* query feature index */
  gt_feature_index_get_features_for_range(shm->fi, arr, GT_FI_TEST_SEQID, &rng,
                                          err);

  /* result size must be equal */
  if (gt_array_size(arr) != gt_array_size(arr_ref))
    had_err = -1;

  /* nodes must be the same (note that we should not rely on ptr equality) */
  if (!had_err) {
    gt_array_sort(arr_ref, cmp_range_start);
    gt_array_sort(arr    , cmp_range_start);

    for (i=0;i<gt_array_size(arr);i++) {
      if (had_err)
        break;
      if (!gt_feature_node_is_similar(*(GtFeatureNode**) gt_array_get(arr, i),
                                      *(GtFeatureNode**)
                                      gt_array_get(arr_ref, i))) {
        had_err = -1;
      }
    }
  }

  if (had_err) {
    gt_mutex_lock(shm->mutex);
    shm->error_count++;
    gt_mutex_unlock(shm->mutex);
  }

  gt_array_delete(arr);
  gt_array_delete(arr_ref);
  return NULL;
}
Exemplo n.º 19
0
int gt_canvas_cairo_visit_element(GtCanvas *canvas, GtElement *elem,
                                  GtError *err)
{
  int had_err = 0, arrow_status = ARROW_NONE;
  GtRange elem_range = gt_element_get_range(elem);
  GtDrawingRange draw_range;
  double elem_start = GT_UNDEF_DOUBLE,
         elem_width = GT_UNDEF_DOUBLE,
         stroke_width = STROKE_WIDTH_DEFAULT,
         bar_height = BAR_HEIGHT_DEFAULT,
         arrow_width = ARROW_WIDTH_DEFAULT;
  GtColor elem_color, grey, fill_color;
  const char *type;
  GtStyleQueryStatus rval;
  GtStr *style;
  GtStrand strand = gt_element_get_strand(elem);

  gt_assert(canvas && elem);

  /* This shouldn't happen. */
  if (!gt_range_overlap(&elem_range, &canvas->pvt->viewrange))
    return -1;

  type = gt_element_get_type(elem);
  grey.red = grey.green = grey.blue = .85;
  grey.alpha = 0.5;

  /* get default or image-wide bar height */
  if (gt_style_get_num(canvas->pvt->sty, "format", "bar_height", &bar_height,
                       NULL, err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }
  /* try to get type-specific bar height */
  if (gt_style_get_num_with_track(canvas->pvt->sty, type, "bar_height",
                       &bar_height,
                       gt_element_get_node_ref(elem),
                       gt_track_get_title(canvas->pvt->current_track),
                       err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }
  /* get default or image-wide arrow width */
  if (gt_style_get_num(canvas->pvt->sty, "format", "arrow_width", &arrow_width,
                        NULL, err)== GT_STYLE_QUERY_ERROR) {
    return -1;
  }
  /* try to get type-specific arrow width */
  if (gt_style_get_num_with_track(canvas->pvt->sty, type, "arrow_width",
                       &arrow_width,
                       gt_element_get_node_ref(elem),
                       gt_track_get_title(canvas->pvt->current_track),
                       err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }

  if ((strand == GT_STRAND_REVERSE || strand == GT_STRAND_BOTH)
         /*&& delem == gt_dlist_first(elems)*/)
    arrow_status = ARROW_LEFT;
  if ((strand == GT_STRAND_FORWARD || strand == GT_STRAND_BOTH)
         /*&& gt_dlistelem_next(delem) == NULL*/)
    arrow_status = (arrow_status == ARROW_LEFT ? ARROW_BOTH : ARROW_RIGHT);

  gt_log_log("processing element from %lu to %lu, strand %d\n",
             elem_range.start, elem_range.end, (int) strand);

  draw_range = gt_coords_calc_generic_range(elem_range, canvas->pvt->viewrange);
  draw_range.start *= (canvas->pvt->width-2*canvas->pvt->margins);
  draw_range.end *= (canvas->pvt->width-2*canvas->pvt->margins);
  elem_start = draw_range.start + canvas->pvt->margins;
  elem_width = draw_range.end - draw_range.start;
  gt_assert(elem_start != GT_UNDEF_DOUBLE && elem_width != GT_UNDEF_DOUBLE);

  if (gt_element_is_marked(elem)) {
    if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke_marked",
                           &elem_color, gt_element_get_node_ref(elem),
                           gt_track_get_title(canvas->pvt->current_track),
                           err) == GT_STYLE_QUERY_ERROR) {
      return -1;
    }
    if (gt_style_get_num_with_track(canvas->pvt->sty, "format",
                          "stroke_marked_width",
                          &stroke_width, gt_element_get_node_ref(elem),
                          gt_track_get_title(canvas->pvt->current_track),
                          err) == GT_STYLE_QUERY_ERROR) {
    return -1;
    }
  }
  else {
    if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke",
                              &elem_color,
                              gt_element_get_node_ref(elem),
                              gt_track_get_title(canvas->pvt->current_track),
                              err) == GT_STYLE_QUERY_ERROR) {
      return -1;
    }
    if (gt_style_get_num_with_track(canvas->pvt->sty, "format", "stroke_width",
                         &stroke_width,
                         gt_element_get_node_ref(elem),
                         gt_track_get_title(canvas->pvt->current_track),
                         err) == GT_STYLE_QUERY_ERROR) {
      return -1;
    }
    if (gt_style_get_num_with_track(canvas->pvt->sty, type, "stroke_width",
                         &stroke_width,
                         gt_element_get_node_ref(elem),
                         gt_track_get_title(canvas->pvt->current_track),
                         err) == GT_STYLE_QUERY_ERROR) {
      return -1;
    }
  }
  if (gt_style_get_color_with_track(canvas->pvt->sty, type, "fill",
                                 &fill_color,
                                 gt_element_get_node_ref(elem),
                                 gt_track_get_title(canvas->pvt->current_track),
                                 err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }

  if (canvas->pvt->bt &&
          gt_double_smaller_double(draw_range.end-draw_range.start, 1.1))
  {
    if ((unsigned long) draw_range.start > gt_bittab_size(canvas->pvt->bt))
      return had_err;
    if (gt_bittab_bit_is_set(canvas->pvt->bt, (unsigned long) draw_range.start))
      return had_err;
    gt_graphics_draw_vertical_line(canvas->pvt->g,
                                   elem_start,
                                   canvas->pvt->y - bar_height/2,
                                   elem_color,
                                   bar_height,
                                   stroke_width);
    gt_bittab_set_bit(canvas->pvt->bt, (unsigned long) draw_range.start);
  }

  /* register coordinates in GtImageInfo object if available */
  if (canvas->pvt->ii)
  {
    GtRecMap *rm = gt_rec_map_new(elem_start, canvas->pvt->y - bar_height/2,
                                  elem_start+elem_width,
                                  canvas->pvt->y+bar_height/2,
                                  (GtFeatureNode*)
                                    gt_element_get_node_ref(elem));
    gt_image_info_add_rec_map(canvas->pvt->ii, rm);
    }

  if (canvas->pvt->bt && draw_range.end-draw_range.start <= 1.1)
  {
    return had_err;
  }

  gt_log_log("drawing element from %f to %f, arrow status: %d",
             draw_range.start, draw_range.end, arrow_status);

  /* draw each element according to style set in the style */
  style = gt_str_new();
  rval = gt_style_get_str_with_track(canvas->pvt->sty, type, "style", style,
                          gt_element_get_node_ref(elem),
                          gt_track_get_title(canvas->pvt->current_track),
                          err);
  switch (rval) {
    case GT_STYLE_QUERY_NOT_SET:
      gt_str_set(style, "box");    /* default style */
      break;
    case GT_STYLE_QUERY_ERROR:
      gt_str_delete(style);
      gt_assert(gt_error_is_set(err));
      return -1;
    default:
      break;
  }

  if (strcmp(gt_str_get(style), "box") == 0)
  {
    gt_graphics_draw_box(canvas->pvt->g,
                         elem_start,
                         canvas->pvt->y - bar_height/2,
                         elem_width,
                         bar_height,
                         fill_color,
                         arrow_status,
                         arrow_width,
                         stroke_width,
                         elem_color,
                         false);
  }
  else if (strcmp(gt_str_get(style), "rectangle") == 0)
  {
    gt_graphics_draw_box(canvas->pvt->g,
                         elem_start,
                         canvas->pvt->y - bar_height/2,
                         elem_width,
                         bar_height,
                         fill_color,
                         ARROW_NONE,
                         arrow_width,
                         stroke_width,
                         elem_color,
                         false);
  }
  else if (strcmp(gt_str_get(style), "caret") == 0)
  {
    gt_graphics_draw_caret(canvas->pvt->g,
                           elem_start,
                           canvas->pvt->y - bar_height/2,
                           elem_width,
                           bar_height,
                           ARROW_NONE,
                           arrow_width,
                           stroke_width,
                           elem_color);
  }
  else if (strcmp(gt_str_get(style), "dashes") == 0)
  {
    gt_graphics_draw_dashes(canvas->pvt->g,
                            elem_start,
                            canvas->pvt->y - bar_height/2,
                            elem_width,
                            bar_height,
                            arrow_status,
                            arrow_width,
                            stroke_width,
                            elem_color);
  }
  else if (strcmp(gt_str_get(style), "line") == 0)
  {
    gt_graphics_draw_horizontal_line(canvas->pvt->g,
                                     elem_start,
                                     canvas->pvt->y - bar_height/2,
                                     elem_color,
                                     elem_width,
                                     1.0);
  }
  else
  {
     gt_graphics_draw_box(canvas->pvt->g,
                          elem_start,
                          canvas->pvt->y - bar_height/2,
                          elem_width,
                          bar_height,
                          fill_color,
                          arrow_status,
                          arrow_width,
                          stroke_width,
                          elem_color,
                          false);
  }
  gt_str_delete(style);

  /* draw arrowheads at clipped margins */
  if (draw_range.clip == CLIPPED_LEFT || draw_range.clip == CLIPPED_BOTH)
      gt_graphics_draw_arrowhead(canvas->pvt->g,
                                 canvas->pvt->margins - 10,
                                 canvas->pvt->y - 4,
                                 grey,
                                 ARROW_LEFT);
  if (draw_range.clip == CLIPPED_RIGHT || draw_range.clip == CLIPPED_BOTH)
      gt_graphics_draw_arrowhead(canvas->pvt->g,
                                 canvas->pvt->width-canvas->pvt->margins + 10,
                                 canvas->pvt->y - 4,
                                 grey,
                                 ARROW_RIGHT);
  return had_err;
}
Exemplo n.º 20
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream  = NULL,
               *gff3_out_stream = NULL,
               *pdom_stream     = NULL,
               *ppt_stream      = NULL,
               *pbs_stream      = NULL,
               *tab_out_stream  = NULL,
               *sa_stream       = NULL,
               *last_stream     = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  GtRegionMapping *rmap = NULL;
  GtPdomModelSet *ms = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* determine and open sequence source */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    /* create region mapping */
    rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rmap)
      had_err = -1;
  } else {
    GtEncseqLoader *el;
    GtEncseq *encseq;
    /* no new-style sequence source option given, fall back to legacy syntax */
    if (argc < 3) {
      gt_error_set(err, "missing mandatory argument(s)");
      had_err = -1;
    }
    if (!had_err) {
      el = gt_encseq_loader_new();
      gt_encseq_loader_disable_autosupport(el);
      gt_encseq_loader_require_md5_support(el);
      gt_encseq_loader_require_description_support(el);
      encseq = gt_encseq_loader_load(el, argv[argc-1], err);
      /* XXX: clip off terminal argument */
      gt_free((char*) argv[argc-1]);
      argv[argc-1] = NULL;
      argc--;
      gt_encseq_loader_delete(el);
      if (!encseq)
        had_err = -1;
      else {
        rmap = gt_region_mapping_new_encseq_seqno(encseq);
        gt_encseq_delete(encseq);
      }
    }
  }
  gt_assert(had_err || rmap);

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
    arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                           err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

  /* Set HMMER cutoffs. */
  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->cutoff, rmap, err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 GT_LTRDIGEST_TAG);
        if (arguments->output_all_chains)
          gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*)
                                                                        pdom_v);
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
      }
    } else had_err = -1;
  }

  if (!had_err && arguments->trna_lib_bs) {
    GtNodeVisitor *pbs_v;
    pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius,
                                         arguments->max_edist,
                                         arguments->alilen,
                                         arguments->offsetlen,
                                         arguments->trnaoffsetlen,
                                         arguments->ali_score_match,
                                         arguments->ali_score_mismatch,
                                         arguments->ali_score_insertion,
                                         arguments->ali_score_deletion,
                                         arguments->trna_lib_bs, err);
    if (pbs_v != NULL)
      last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *ppt_v;
    ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len,
                                         arguments->ubox_len,
                                         arguments->ppt_pyrimidine_prob,
                                         arguments->ppt_purine_prob,
                                         arguments->bkg_a_prob,
                                         arguments->bkg_g_prob,
                                         arguments->bkg_t_prob,
                                         arguments->bkg_c_prob,
                                         arguments->ubox_u_prob,
                                         arguments->ppt_radius,
                                         arguments->max_ubox_dist, err);
    if (ppt_v != NULL)
      last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *sa_v;
    sa_v = gt_ltrdigest_strand_assign_visitor_new();
    gt_assert(sa_v);
    last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v);
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new(
                                                  last_stream,
                                                  tests_to_run,
                                                  rmap,
                                                  gt_str_get(arguments->prefix),
                                                  arguments->seqnamelen,
                                                  err);
      if (!tab_out_stream)
        had_err = -1;
      if (!had_err && arguments->print_metadata)
      {
        had_err = gt_ltrdigest_file_out_stream_write_metadata(
                                           (GtLTRdigestFileOutStream*)
                                                                 tab_out_stream,
                                           tests_to_run,
                                           gt_str_get(arguments->trna_lib),
                                           argv[arg],
                                           arguments->ppt_len,
                                           arguments->ubox_len,
                                           arguments->ppt_radius,
                                           arguments->alilen,
                                           arguments->max_edist,
                                           arguments->offsetlen,
                                           arguments->trnaoffsetlen,
                                           arguments->pbs_radius,
                                           arguments->hmm_files,
                                           arguments->chain_max_gap_length,
                                           arguments->evalue_cutoff,
                                           err);
      }
      if (!had_err)
      {
        if (arguments->write_alignments)
          gt_ltrdigest_file_out_stream_enable_pdom_alignment_output(
                                                                tab_out_stream);
        if (arguments->write_aaseqs)
          gt_ltrdigest_file_out_stream_enable_aa_sequence_output(
                                                                tab_out_stream);
      }
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_pdom_model_set_delete(ms);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ppt_stream);
  gt_node_stream_delete(pbs_stream);
  gt_node_stream_delete(sa_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_bioseq_delete(arguments->trna_lib_bs);
  gt_region_mapping_delete(rmap);

  return had_err;
}
Exemplo n.º 21
0
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats,
                                 GtFeatureIndex *predfeats, AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids);

  // Print reference sequences with no prediction annotations
  GtUword i, j;
  for(i = 0; i < gt_str_array_size(refrseqids); i++)
  {
    const char *refrseq = gt_str_array_get(refrseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(refrseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no prediction annotations found for "
                             "sequence '%s'", refrseq);
    }
  }

  // Print prediction sequences with no reference annotations
  for(i = 0; i < gt_str_array_size(predseqids); i++)
  {
    const char *predseq = gt_str_array_get(predseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(predseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no reference annotations found for "
                             "sequence '%s'", predseq);
    }
  }

  if(gt_str_array_size(seqids) == 0)
  {
    agn_logger_log_error(logger, "no sequences in common between reference and "
                         "prediction");
  }

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}
Exemplo n.º 22
0
static int hcr_write_seqs(FILE *fp, GtHcrEncoder *hcr_enc, GtError *err)
{
    int had_err = 0, seqit_err;
    GtUword bits_to_write = 0,
            len,
            read_counter = 0,
            page_counter = 0,
            bits_left_in_page,
            cur_read = 0;
    GtWord filepos;
    GtSeqIterator *seqit;
    const GtUchar *seq,
          *qual;
    char *desc;
    GtBitOutStream *bitstream;

    gt_error_check(err);
    gt_assert(hcr_enc->seq_encoder->sampling);

    gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8));

    gt_xfseek(fp, hcr_enc->seq_encoder->start_of_encoding, SEEK_SET);
    bitstream = gt_bitoutstream_new(fp);

    seqit = gt_seq_iterator_fastq_new(hcr_enc->files, err);
    if (!seqit) {
        gt_assert(gt_error_is_set(err));
        had_err = -1;
    }

    if (!had_err) {
        gt_seq_iterator_set_quality_buffer(seqit, &qual);
        gt_seq_iterator_set_symbolmap(seqit,
                                      gt_alphabet_symbolmap(hcr_enc->seq_encoder->alpha));
        hcr_enc->seq_encoder->total_num_of_symbols = 0;
        while (!had_err &&
                (seqit_err = gt_seq_iterator_next(seqit,
                             &seq,
                             &len,
                             &desc, err)) == 1) {

            /* count the bits */
            bits_to_write = hcr_write_seq(hcr_enc->seq_encoder, seq, qual, len,
                                          bitstream, true);

            /* check if a new sample has to be added */
            if (gt_sampling_is_next_element_sample(hcr_enc->seq_encoder->sampling,
                                                   page_counter,
                                                   read_counter,
                                                   bits_to_write,
                                                   bits_left_in_page)) {
                gt_bitoutstream_flush_advance(bitstream);

                filepos = gt_bitoutstream_pos(bitstream);
                if (filepos < 0) {
                    had_err = -1;
                    gt_error_set(err, "error by ftell: %s", strerror(errno));
                }
                else {
                    gt_sampling_add_sample(hcr_enc->seq_encoder->sampling,
                                           (size_t) filepos,
                                           cur_read);

                    read_counter = 0;
                    page_counter = 0;
                    gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8));
                }
            }

            if (!had_err) {
                /* do the writing */
                bits_to_write = hcr_write_seq(hcr_enc->seq_encoder,
                                              seq, qual, len, bitstream, false);

                /* update counter for sampling */
                while (bits_left_in_page < bits_to_write) {
                    page_counter++;
                    bits_to_write -= bits_left_in_page;
                    gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8));
                }
                bits_left_in_page -= bits_to_write;
                /* always set first page as written */
                if (page_counter == 0)
                    page_counter++;
                read_counter++;
                hcr_enc->seq_encoder->total_num_of_symbols += len;
                cur_read++;
            }
        }
        gt_assert(hcr_enc->num_of_reads == cur_read);
        if (!had_err && seqit_err) {
            had_err = seqit_err;
            gt_assert(gt_error_is_set(err));
        }
    }

    if (!had_err) {
        gt_bitoutstream_flush(bitstream);
        filepos = gt_bitoutstream_pos(bitstream);
        if (filepos < 0) {
            had_err = -1;
            gt_error_set(err, "error by ftell: %s", strerror(errno));
        }
        else {
            hcr_enc->seq_encoder->startofsamplingtab = filepos;
            gt_log_log("start of samplingtab: "GT_WU"",
                       hcr_enc->seq_encoder->startofsamplingtab);
            if (hcr_enc->seq_encoder->sampling != NULL)
                gt_sampling_write(hcr_enc->seq_encoder->sampling, fp);
        }
    }
    gt_bitoutstream_delete(bitstream);
    gt_seq_iterator_delete(seqit);
    return had_err;
}
Exemplo n.º 23
0
int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum,
                          char *seq, char *qual, GtStr *desc, GtError *err)
{
  int had_err = 0;
  GtUword nearestsample = 0,
                reads_to_read = 0,
                idx,
                current_read = hcr_dec->seq_dec->cur_read ;
  size_t startofnearestsample = 0;
  GtSampling *sampling;
  HcrHuffDataIterator *data_iter;
  GtHuffmanDecoder *huff_dec;

  gt_error_check(err);
  gt_assert(hcr_dec);
  gt_assert(readnum < hcr_dec->seq_dec->num_of_reads);
  gt_assert(seq != NULL && qual != NULL);

  if (current_read == readnum)
    had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ?
      -1 : 0;
  else {
    sampling = hcr_dec->seq_dec->sampling;
    data_iter = hcr_dec->seq_dec->data_iter;
    huff_dec = hcr_dec->seq_dec->huff_dec;

    if (sampling != NULL) {
      gt_sampling_get_page(sampling,
                           readnum,
                           &nearestsample,
                           &startofnearestsample);
      /* nearestsample <= cur_read < readnum: current sample is the right one */
      if (nearestsample <= current_read && current_read <= readnum)
        reads_to_read = readnum - current_read;
      else { /* reset decoder to new sample */
        reset_data_iterator_to_pos(data_iter, startofnearestsample);
        had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
        reads_to_read = readnum - nearestsample;
        hcr_dec->seq_dec->cur_read = nearestsample;
      }
      gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"",
                 reads_to_read,nearestsample);
      gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample);
    }
    else {
      if (current_read <= readnum)
        reads_to_read = readnum - current_read;
      else {
        reset_data_iterator_to_start(data_iter);
        had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
        reads_to_read = readnum;
        hcr_dec->seq_dec->cur_read = 0;
      }
    }

    for (idx = 0; !had_err && idx < reads_to_read; idx++)
      had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1 ?
        -1 : 0;

    if (!had_err)
      had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ?
        -1 : 0;
  }
  if (had_err)
    gt_assert(gt_error_is_set(err));

  if (!had_err && hcr_dec->encdesc != NULL)
    had_err = gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err);
  if (had_err)
    gt_assert(gt_error_is_set(err));

  return had_err;
}
int main(int argc, char *argv[])
{
  const char *style_file, *png_file, *gff3_file;
  char *seqid;
  GtStyle *style;
  GtBioseq *bioseq;
  GtFeatureIndex *feature_index;
  GtRange range;
  GtDiagram *diagram;
  GtLayout *layout;
  GtCanvas *canvas;
  GtCustomTrack *custom;
  GtUword height, windowsize;
  GtError *err;

  if (argc != 9) {
    fprintf(stderr, "Usage: %s style_file PNG_file GFF3_file Seq_file seqid"
                    " start end windowsize\n",
                    argv[0]);
    return EXIT_FAILURE;
  }

  style_file = argv[1];
  png_file = argv[2];
  gff3_file = argv[3];

  /* initialize */
  gt_lib_init();

  /* create error object */
  err = gt_error_new();

  /* create style */
  if (!(style = gt_style_new(err)))
    handle_error(err);

  /* load style file */
  if (gt_style_load_file(style, style_file, err))
    handle_error(err);

  /* create feature index */
  feature_index = gt_feature_index_memory_new();

  /* add GFF3 file to index */
  if (gt_feature_index_add_gff3file(feature_index, gff3_file, err))
    handle_error(err);

  /* create diagram for first sequence ID in feature index */
  seqid = argv[5];
  if (gt_feature_index_get_range_for_seqid(feature_index, &range, seqid, err))
    handle_error(err);
  sscanf(argv[6], "%lu", &range.start);
  sscanf(argv[7], "%lu", &range.end);
  sscanf(argv[8], "%lu", &windowsize);

  diagram = gt_diagram_new(feature_index, seqid, &range, style, err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* load sequence for GC plot */
  bioseq = gt_bioseq_new(argv[4], err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* create custom track with GC plot for first sequence in file,
     window size 1000, 40px height and average line at 16.5% */
  custom = gt_custom_track_gc_content_new(gt_bioseq_get_sequence(bioseq, 0),
                                          gt_bioseq_get_sequence_length(bioseq,
                                                                        0),
                                          windowsize,
                                          70,
                                          0.165,
                                          true);
  gt_diagram_add_custom_track(diagram, custom);

  /* create layout with given width, determine resulting image height */
  layout = gt_layout_new(diagram, 600, style, err);
  if (gt_error_is_set(err))
    handle_error(err);
  if (gt_layout_get_height(layout, &height, err))
    handle_error(err);

  /* create PNG canvas */
  canvas = gt_canvas_cairo_file_new(style, GT_GRAPHICS_PNG, 600, height,
                                    NULL, err);
  if (!canvas)
    handle_error(err);

  /* sketch layout on canvas */
  if (gt_layout_sketch(layout, canvas, err))
    handle_error(err);

  /* write canvas to file */
  if (gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, png_file, err))
    handle_error(err);

  /* free */
  gt_custom_track_delete(custom);
  gt_bioseq_delete(bioseq);
  gt_canvas_delete(canvas);
  gt_layout_delete(layout);
  gt_diagram_delete(diagram);
  gt_feature_index_delete(feature_index);
  gt_style_delete(style);
  gt_error_delete(err);

  /* perform static data cleanup */
  gt_lib_clean();
  return EXIT_SUCCESS;
}
Exemplo n.º 25
0
static int hcr_next_seq_qual(GtHcrSeqDecoder *seq_dec, char *seq, char *qual,
                             GtError *err)
{
    enum state {
        HCR_ERROR = -1,
        END,
        SUCCESS
    };
    unsigned char base;
    GtUword i,
            nearestsample,
            *symbol;
    size_t startofnearestsample = 0;
    enum state status = END;
    FastqFileInfo cur_read;
    FastqFileInfo *fileinfo = NULL;

    if (seq_dec->cur_read <= seq_dec->num_of_reads) {
        status = SUCCESS;
        if (seq_dec->symbols == NULL)
            seq_dec->symbols = gt_array_new(sizeof (GtUword));
        else
            gt_array_reset(seq_dec->symbols);

        cur_read.readnum = seq_dec->cur_read;
        gt_log_log("cur_read: "GT_WU"",seq_dec->cur_read);
        fileinfo = (FastqFileInfo *)gt_rbtree_next_key(seq_dec->file_info_rbt,
                   &cur_read,
                   hcr_cmp_FastqFileInfo,
                   NULL);
        gt_assert(fileinfo);

        /* reset huffman_decoder if next read is sampled */
        if (gt_sampling_get_next_elementnum(seq_dec->sampling) ==
                seq_dec->cur_read) {
            gt_log_log("reset because sampled read is next");
            (void) gt_sampling_get_next_sample(seq_dec->sampling,
                                               &nearestsample,
                                               &startofnearestsample);
            reset_data_iterator_to_pos(seq_dec->data_iter, startofnearestsample);
            (void) gt_huffman_decoder_get_new_mem_chunk(seq_dec->huff_dec, err);
            if (gt_error_is_set(err))
                status = HCR_ERROR;
        }
        if (status != HCR_ERROR) {
            int ret;
            ret =  gt_huffman_decoder_next(seq_dec->huff_dec, seq_dec->symbols,
                                           fileinfo->readlength, err);
            if (ret != 1)
                status = HCR_ERROR;
            if (ret == 0)
                gt_error_set(err, "reached end of file");
        }
        if (qual || seq) {
            gt_log_log("set strings");
            for (i = 0; i < gt_array_size(seq_dec->symbols); i++) {
                symbol = (GtUword*) gt_array_get(seq_dec->symbols, i);
                if (qual != NULL)
                    qual[i] = get_qual_from_symbol(seq_dec, *symbol);
                if (seq != NULL) {
                    base = get_base_from_symbol(seq_dec, *symbol);
                    seq[i] = (char)toupper(gt_alphabet_decode(seq_dec->alpha,
                                           (GtUchar) base));
                }
            }
            if (qual != NULL)
                qual[gt_array_size(seq_dec->symbols)] = '\0';
            if (seq != NULL)
                seq[gt_array_size(seq_dec->symbols)] = '\0';
        }
        seq_dec->cur_read++;
    }
    return (int) status;
}
Exemplo n.º 26
0
int gt_block_unit_test(GtError *err)
{
  GtRange r1, r2, r_temp, b_range;
  GtStrand s;
  GtGenomeNode *gn1, *gn2;
  GtElement *e1, *e2;
  double height;
  GtBlock *b;
  GtStr *seqid, *caption1, *caption2;
  int had_err = 0;
  GtStyle *sty;
  GtError *testerr;
  gt_error_check(err);

  seqid = gt_str_new_cstr("seqid");
  caption1 = gt_str_new_cstr("foo");
  caption2 = gt_str_new_cstr("bar");
  testerr = gt_error_new();

  r1.start = 10UL;
  r1.end = 50UL;

  r2.start = 40UL;
  r2.end = 50UL;

  gn1 = gt_feature_node_new(seqid, gt_ft_gene, r1.start, r1.end,
                            GT_STRAND_FORWARD);
  gn2 = gt_feature_node_new(seqid, gt_ft_exon, r2.start, r2.end,
                            GT_STRAND_FORWARD);

  e1 = gt_element_new((GtFeatureNode*) gn1);
  e2 = gt_element_new((GtFeatureNode*) gn2);

  b = gt_block_new();

  /* test gt_block_insert_elements */
  gt_ensure((0UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn1);
  gt_ensure((1UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn2);
  gt_ensure((2UL == gt_block_get_size(b)));

  /* test gt_block_set_range & gt_block_get_range */
  r_temp = gt_range_join(&r1, &r2);
  gt_block_set_range(b, r_temp);
  b_range = gt_block_get_range(b);
  gt_ensure((0 == gt_range_compare(&b_range, &r_temp)));
  gt_ensure((1 == gt_range_compare(&r2, &r_temp)));

  /* tests gt_block_set_caption & gt_block_get_caption */
  gt_block_set_caption(b, caption1);
  gt_ensure((0 == gt_str_cmp(gt_block_get_caption(b), caption1)));
  gt_ensure((0 != gt_str_cmp(gt_block_get_caption(b), caption2)));

  /* tests gt_block_set_strand & gt_block_get_range */
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_UNKNOWN == s));
  gt_block_set_strand(b, GT_STRAND_FORWARD);
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_FORWARD == s));

  /* test gt_block_get_max_height() */
  sty = gt_style_new(err);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == BAR_HEIGHT_DEFAULT);
  gt_style_set_num(sty, "exon", "bar_height", 42);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_set_num(sty, "gene", "bar_height", 23);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_unset(sty, "exon", "bar_height");
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 23);

  gt_str_delete(caption2);
  gt_str_delete(seqid);
  gt_element_delete(e1);
  gt_element_delete(e2);
  gt_block_delete(b);
  gt_style_delete(sty);
  gt_error_delete(testerr);
  gt_genome_node_delete(gn1);
  gt_genome_node_delete(gn2);

  return had_err;
}
Exemplo n.º 27
0
int gt_track_unit_test(GtError *err)
{
    int had_err = 0;
    GtBlock *b[4];
    GtRange r[4];
    GtTrack *track;
    GtGenomeNode *parent[4], *gn[4];
    GtStr *title;
    double height, tmp;
    GtStyle *sty;
    unsigned long i;
    GtLineBreaker *lb;
    double t_rest = 0,
           l_rest = 0;
    gt_error_check(err);

    title = gt_str_new_cstr("test");

    r[0].start=100UL;
    r[0].end=1000UL;
    r[1].start=1001UL;
    r[1].end=1500UL;
    r[2].start=700UL;
    r[2].end=1200UL;
    r[3].start=10UL;
    r[3].end=200UL;

    for (i=0; i<4; i++)
    {
        parent[i] = gt_feature_node_new(title, gt_ft_gene, r[i].start, r[i].end,
                                        GT_STRAND_FORWARD);
        gn[i] = gt_feature_node_new(title, gt_ft_exon, r[i].start, r[i].end,
                                    GT_STRAND_FORWARD);

        gt_feature_node_add_child((GtFeatureNode*) parent[i],
                                  (GtFeatureNode*) gn[i]);

        gt_feature_node_add_attribute((GtFeatureNode*) parent[i], GT_GFF_NAME,
                                      "parent");
        gt_feature_node_add_attribute((GtFeatureNode*) gn[i], GT_GFF_NAME, "child");
    }

    for (i=0; i<4; i++)
    {
        b[i] = gt_block_new();
        gt_block_set_range(b[i], r[i]);
        gt_block_insert_element(b[i], (GtFeatureNode*) parent[i]);
        gt_block_insert_element(b[i], (GtFeatureNode*) gn[i]);
    }

    lb = gt_line_breaker_bases_new();

    sty = gt_style_new(err);

    if (gt_style_get_num(sty, "format", "track_caption_font_size", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = TEXT_SIZE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "track_caption_space", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = CAPTION_BAR_SPACE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "track_vspace", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = TRACK_VSPACE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "bar_vspace", &l_rest,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        l_rest = BAR_VSPACE_DEFAULT;
    }

    track = gt_track_new(title, GT_UNDEF_ULONG, true, lb);
    gt_ensure(had_err, track);
    gt_ensure(had_err, gt_track_get_title(track) == title);

    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 0);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[0], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[1], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[2], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2);
    gt_ensure(had_err, gt_track_insert_block(track, b[3], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest + BAR_HEIGHT_DEFAULT));
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_style_set_num(sty, "exon", "bar_height", 42);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+42));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_set_num(sty, "gene", "bar_height", 23);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+42));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_unset(sty, "exon", "bar_height");
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+23));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_unset(sty, "gene", "bar_height");
    gt_style_set_num(sty, "format", "bar_height", 99);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+99));
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_get_number_of_discarded_blocks(track) == 0);

    gt_track_delete(track);
    gt_str_delete(title);
    gt_style_delete(sty);
    for (i=0; i<4; i++)
    {
        gt_block_delete(b[i]);
        gt_genome_node_delete(parent[i]);
    }
    return had_err;
}
static int gt_compreads_decompress_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments, GtError *err)
{
  GtCsrHcrDecodeArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha = NULL;
  GtHcrDecoder *hcrd = NULL;
  GtTimer *timer = NULL;
  unsigned long start,
                end;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("start");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (gt_str_length(arguments->smap) > 0) {
    alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap),
                                                err);
    if (!alpha)
      had_err = -1;
  }
  else {
    alpha = gt_alphabet_new_dna();
    if (!alpha)
      had_err = -1;
  }

  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "decoding", stdout);

    if (gt_str_length(arguments->name) == 0) {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_get(arguments->file));
      gt_str_set(arguments->name, basenameptr);
      gt_free(basenameptr);
    }
    hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha,
                              arguments->descs, timer, err);
    if (hcrd == NULL)
      had_err = -1;
    else {
      if (arguments->bench != 0) {
        had_err = gt_compreads_decompress_benchmark(hcrd,
                                                    arguments->bench,
                                                    timer, err);
      }
      else {
        if (arguments->rng.start != GT_UNDEF_ULONG
            && arguments->rng.end != GT_UNDEF_ULONG) {
          if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd)
                || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) {
            gt_error_set(err, "range %lu-%lu includes a read number exceeding "
                              "the total number of reads (%lu)",
                              arguments->rng.start,
                              arguments->rng.end,
                              gt_hcr_decoder_num_of_reads(hcrd));
            had_err = -1;
          }
          start = arguments->rng.start;
          end = arguments->rng.end;
        }
        else {
          start = 0;
          end = gt_hcr_decoder_num_of_reads(hcrd) - 1;
        }
        if (!had_err) {
          gt_log_log("filebasename: %s", gt_str_get(arguments->name));
          if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name),
                                          start, end, timer, err)
            != 0)
            had_err = -1;
        }
      }
    }
    gt_hcr_decoder_delete(hcrd);
  }
  gt_alphabet_delete(alpha);
  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  if (had_err)
    gt_assert(gt_error_is_set(err));
  return had_err;
}
Exemplo n.º 29
0
int gt_translator_unit_test(GtError *err)
{
  int had_err = 0;
  GtTranslatorStatus test_errnum;
  GtTranslator *tr;
  GtCodonIterator *ci;
  GtError *test_err;
  GtStrArray *codons, *invalidcodons;
  const char *seq = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGT"
                    "GGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGT"
                    "TACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG";
  const char *no_startcodon = "AAAAAAAAAATCATCTCCCCATTTTTTT";
  const char *invalidseq  = "ZAGCTTTTCATTCTGACTGCAAATATGTCTCTGTGT";
  const char *invalidseq2 = "AGCTTTTCATTCTGACZTGCAAATATGTCTCTGTGT";

  char translated;
  unsigned int frame;
  GtUword pos = 0;
  GtStr *protein[3];
  gt_error_check(err);

  test_err = gt_error_new();
  ci = gt_codon_iterator_simple_new(seq, (GtUword) strlen(seq), test_err);
  tr = gt_translator_new(ci);
  protein[0] = gt_str_new();
  protein[1] = gt_str_new();
  protein[2] = gt_str_new();
  codons = gt_str_array_new();
  gt_str_array_add_cstr(codons, "ACG");
  gt_str_array_add_cstr(codons, "ACT");
  invalidcodons = gt_str_array_new();
  gt_str_array_add_cstr(invalidcodons, "ACG");
  gt_str_array_add_cstr(invalidcodons, "AC");

  /* do 3-frame translation */
  gt_error_unset(test_err);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
    gt_ensure(
           test_errnum != GT_TRANSLATOR_ERROR && !gt_error_is_set(test_err));
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* check 3-frame translation */
  gt_ensure(strcmp(gt_str_get(protein[0]),
                         "SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LR") == 0);
  gt_ensure(strcmp(gt_str_get(protein[1]),
                         "AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDL") == 0);
  gt_ensure(strcmp(gt_str_get(protein[2]),
                         "LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*") == 0);

  /* find start codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 11UL);

  /* find stop codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 12UL);

  /* find arbitrary codons -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 14UL);

  /* find arbitrary codons -- negative (invalid codons) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, invalidcodons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq,
                                    (GtUword) strlen(invalidseq),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  /* check translation of sequence with invalid beginning */
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  gt_ensure(test_errnum && gt_error_is_set(test_err));

  /* check translation of sequence with invalid character within */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq2,
                                    (GtUword) strlen(invalidseq2),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  /* find start codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(no_startcodon,
                                    (GtUword) strlen(no_startcodon),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find stop codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find arbitrary codons -- negative (none there) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  gt_str_delete(protein[0]);
  gt_str_delete(protein[1]);
  gt_str_delete(protein[2]);
  gt_str_array_delete(codons);
  gt_str_array_delete(invalidcodons);
  gt_error_delete(test_err);

  return had_err;
}
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GT_UNUSED GtError *err)
{
  GtReadjoinerCnttestArguments *arguments = tool_arguments;
  GtEncseqLoader *el = NULL;
  GtEncseq *reads = NULL;
  GtBitsequence *bits = NULL;
  GtUword nofreads;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST)
  {
    GtStr *fn = NULL;
    fn = gt_str_clone(arguments->readset);
    gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err);
    gt_str_delete(fn);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE ||
      arguments->test == GT_READJOINER_CNTTEST_KMP)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    if (!arguments->singlestrand)
      gt_encseq_loader_mirror(el);
    reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err);
    if (reads == NULL)
      had_err = -1;
    else
    {
      gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand,
          false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true,
          NULL, NULL, false, NULL, &bits, &nofreads);
    }
    gt_encseq_delete(reads);
    gt_encseq_loader_delete(el);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_ESA)
  {
    Sequentialsuffixarrayreader *ssar = NULL;
    GtUword readlength = 0, firstrevcompl = 0;
    GtLogger *verbose_logger = gt_logger_new(arguments->verbose,
        GT_LOGGER_DEFLT_PREFIX, stderr);
    ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get(
          arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB,
        true, verbose_logger, err);
    if (gt_error_is_set(err))
      had_err = -1;
    else
    {
      nofreads = gt_encseq_num_of_sequences(ssar->encseq);
      if (!arguments->singlestrand)
      {
        nofreads = GT_DIV2(nofreads);
        firstrevcompl = nofreads;
      }
      GT_INITBITTAB(bits, nofreads);
      if (!arguments->singlestrand)
      if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH)
        readlength = gt_encseq_seqlength(ssar->encseq, 0);
      (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0
          : firstrevcompl, readlength);
    }
    if (ssar != NULL)
      gt_freeSequentialsuffixarrayreader(&ssar);
    gt_logger_delete(verbose_logger);
  }
  else
  {
    gt_assert(false);
  }
  if (!had_err)
    had_err = gt_cntlist_show(bits, nofreads, NULL, false, err);
  gt_free(bits);
  return had_err;
}