Esempio n. 1
0
static int verifycodelists(const GtEncseq *encseq,
                           unsigned int kmersize,
                           unsigned int numofchars,
                           const GtArrayGtCodetype *codeliststream,
                           GtError *err)
{
  bool haserr = false;
  GtArrayGtCodetype codeliststring;
  const GtUchar *characters;
  GtUword stringtotallength;

  gt_error_check(err);
  stringtotallength = gt_encseq_total_length(encseq);
  characters = gt_alphabet_characters(gt_encseq_alphabet(encseq));
  GT_INITARRAY(&codeliststring,GtCodetype);
  collectkmercode(&codeliststring,
                  encseq,
                  kmersize,
                  numofchars,
                  stringtotallength);
  if (comparecodelists(codeliststream,
                       &codeliststring,
                       kmersize,
                       numofchars,
                       (const char *) characters,
                       err) != 0)
  {
    haserr = true;
  }
  GT_FREEARRAY(&codeliststring,GtCodetype);
  return haserr ? -1 : 0;
}
Esempio n. 2
0
int gt_test_trieins(bool onlyins,const char *indexname,GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;
  unsigned long totallength = 0;

  gt_error_check(err);
  if (streamsuffixarray(&suffixarray,
                        SARR_ESQTAB,
                        indexname,
                        NULL,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    totallength = gt_encseq_total_length(suffixarray.encseq);
  }
  if (!haserr)
  {
    Mergertrierep trierep;
    const GtUchar *characters;

    trierep.encseqreadinfo = gt_malloc(sizeof *trierep.encseqreadinfo);
    trierep.encseqreadinfo->encseqptr = suffixarray.encseq;
    trierep.encseqreadinfo->readmode = suffixarray.readmode;
    characters
      = gt_alphabet_characters(gt_encseq_alphabet(suffixarray.encseq));
    gt_mergertrie_initnodetable(&trierep,totallength,1U);
    maketrie(&trierep,characters,totallength);
    if (onlyins)
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showtrie(&trierep,characters);
#endif
      checktrie(&trierep,totallength+1,totallength,err);
#endif
    } else
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showallnoderelations(trierep.root);
#endif
#endif
      successivelydeletesmallest(&trierep,totallength,characters,err);
    }
    gt_mergertrie_delete(&trierep);
  }
  gt_freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
Esempio n. 3
0
void gth_compute_scores(GthSA *sa,
                        bool proteineop,
                        GthDPParam *dp_param,
                        void *dp_options_est,
                        const unsigned char *gen_seq_tran,
                        const unsigned char *ref_seq_tran,
                        const unsigned char *ref_seq_orig,
                        const GtTransTable *transtable,
                        unsigned long gen_dp_start,
                        unsigned long scoreminexonlen,
                        bool introncutout,
                        bool gs2out,
                        GthSplicedSeq *spliced_seq,
                        unsigned long ref_dp_length,
                        GtAlphabet *gen_alphabet,
                        GtAlphabet *ref_alphabet,
                        GthDPScoresProtein *dp_scores_protein)
{
  Traversealignmentfunctions travfunctions;
  Traversealignmentstate travstate;
  Computebordersandscoresdata data;
  GthFlt score, coverageofgenomicsegment, coverageofreferencesegment;

  gt_assert(!gth_sa_num_of_exons(sa));
  gt_assert(!gth_sa_num_of_introns(sa));

  travfunctions.processmismatch  = computescoresprocmismatch;
  travfunctions.processdeletion  = computescoresprocdeletion;
  travfunctions.processinsertion = computebordersandscoresprocinsertion;
  travfunctions.processmatch     = computebordersandscoresprocmatch;
  travfunctions.processintron    = computebordersandscoresprocintron;
  travfunctions.breakcondition   = NULL;

  /* additional functions for protein edit operations */
  travfunctions.processintron_with_1_base_left  =
    computebordersandscoresprocintron;
  travfunctions.processintron_with_2_bases_left =
    computebordersandscoresprocintron;
  travfunctions.processmismatch_with_1_gap      =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processmismatch_with_2_gaps     =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processdeletion_with_1_gap      =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processdeletion_with_2_gaps     =
    computescoresprocmismatchordeletionwithgap;

  travstate.proteineop = proteineop;
  travstate.processing_intron_with_1_base_left  = false;
  travstate.processing_intron_with_2_bases_left = false;
  travstate.alignment = gth_sa_get_editoperations(sa);
  travstate.alignmentlength =  gth_sa_get_editoperations_length(sa);
  travstate.eopptr       = travstate.alignment + travstate.alignmentlength - 1;
  travstate.genomicptr   = gth_sa_genomiccutoff_start(sa);
  travstate.referenceptr = gth_sa_referencecutoff_start(sa);

  if (travstate.alignmentlength <= 0) {
    /* in this case the alignmentscore is set to 0, which leads to discarding
       this alignment later */
    gth_sa_set_score(sa, 0.0);
    return;
  }

  /* editoperations contain no zero base exons */
  gt_assert(gth_sa_contains_no_zero_base_exons(sa));
  /* editoperations contain no leading or terminal introns or insertions */
  gt_assert(containsnoleadingorterminalintronsorinsertions(travstate.alignment,
                                                        travstate
                                                        .alignmentlength,
                                                        proteineop));
  /* sum of edit operations equals referencelength */
  gt_assert(gt_eops_equal_referencelength(travstate.alignment,
                                       travstate.alignmentlength,
                                       ref_dp_length
                                       - gth_sa_referencecutoff_start(sa)
                                       - gth_sa_referencecutoff_end(sa),
                                       proteineop));

  data.proteineop                     = proteineop;
  data.newexon                        = true;
  data.newintron                      = true;
  data.firstexon                      = true;
  data.introncutout                   = introncutout;
  data.gs2out                         = gs2out;
  data.spliced_seq                    = spliced_seq;
  data.singleexonweight               = (GthFlt) 0.0;
  data.maxsingleexonweight            = (GthFlt) 0.0;
  data.overallexonweight              = (GthFlt) 0.0;
  data.maxoverallexonweight           = (GthFlt) 0.0;
  data.cumulativelengthofscoredexons  = 0;

  data.exon.leftgenomicexonborder     = GT_UNDEF_ULONG;
  data.exon.rightgenomicexonborder    = GT_UNDEF_ULONG;
  data.exon.leftreferenceexonborder   = GT_UNDEF_ULONG;
  data.exon.rightreferenceexonborder  = GT_UNDEF_ULONG;
  data.exon.exonscore                 = GTH_UNDEF_GTHDBL;

  data.intron.donorsiteprobability    = GTH_UNDEF_GTHFLT;
  data.intron.acceptorsiteprobability = GTH_UNDEF_GTHFLT;
  data.intron.donorsitescore          = GTH_UNDEF_GTHDBL;
  data.intron.acceptorsitescore       = GTH_UNDEF_GTHDBL;

  data.sa                             = sa;
  data.dp_param                       = dp_param;
  data.dp_options_est                 = dp_options_est;
  data.gen_seq_tran                   = gen_seq_tran;
  data.ref_seq_tran                   = ref_seq_tran;
  data.ref_seq_orig                   = ref_seq_orig;
  data.transtable                     = transtable;
  data.gen_dp_start                   = gen_dp_start;
  data.scoreminexonlen                = scoreminexonlen;
  data.ref_dp_length                  = ref_dp_length;
  data.gen_alphabet                   = gen_alphabet;
  data.gen_alphabet_characters        = gen_alphabet
                                        ? gt_alphabet_characters(gen_alphabet)
                                        : NULL;
  data.dp_scores_protein              = dp_scores_protein;

  gthtraversealignment(true, &travstate, proteineop, &data, &travfunctions);

  /* this is for saving the last exon */
  evalnewintronifpossible(proteineop, &data.newexon, &data.newintron, true,
                          data.introncutout, data.gs2out, data.spliced_seq,
                          &data.exon, &data.intron, &data.singleexonweight,
                          &data.maxsingleexonweight, &data.overallexonweight,
                          &data.maxoverallexonweight,
                          &data.cumulativelengthofscoredexons, sa, &travstate,
                          gen_alphabet, data.dp_param, data.dp_options_est,
                          data.gen_seq_tran, data.ref_seq_tran,
                          data.gen_dp_start, data.scoreminexonlen);

  /* saving the scores for the whole alignment */
  if (data.maxoverallexonweight > 0.0) {
    score = data.overallexonweight / data.maxoverallexonweight;
    /* XXX: the way the alignmentscore is computed, it is possible to get a
       score > 1.0. Since we don't want this, we cap it */
    if (score > 1.0)
      score = 1.0;
  }
  else
    score = 0.0;
  gth_sa_set_score(sa, score);
  gth_sa_set_cumlen_scored_exons(sa, data.cumulativelengthofscoredexons);

  /* fraction of the gen_dp_length which is scored/weighted */
  coverageofgenomicsegment   = (GthFlt) data.cumulativelengthofscoredexons /
                               (GthFlt) gth_sa_gen_dp_length(sa);
  /* coverage of genomic segment is valid value */
  gt_assert(coverageofgenomicsegment >= 0.0 && coverageofgenomicsegment <= 1.0);

  /* fraction of the referencelength which is scored/weighted */
  coverageofreferencesegment = (GthFlt) data.cumulativelengthofscoredexons /
                               (GthFlt) ((proteineop ? GT_CODON_LENGTH : 1) *
                                         gth_sa_ref_total_length(sa));

  if (coverageofgenomicsegment > coverageofreferencesegment) {
    gth_sa_set_coverage(sa, coverageofgenomicsegment);
    gth_sa_set_highest_cov(sa, true);
  }
  else {
    gth_sa_set_coverage(sa, coverageofreferencesegment);
    gth_sa_set_highest_cov(sa, false);
  }

  /* test the assumption that the coverage is never larger then the default */
  gt_assert(gth_sa_coverage(sa) <= GTH_DEFAULT_MAX_COVERAGE);

  /* compute poly(A) tail position */
  gth_sa_calc_polyAtailpos(sa, ref_seq_tran, ref_alphabet);

  /* determined exons are forward and consecutive */
  gt_assert(gth_sa_exons_are_forward_and_consecutive(sa));
}
Esempio n. 4
0
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}
Esempio n. 5
0
int gt_runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err)
{
  Genericindex *genericindex = NULL;
  bool haserr = false;
  GtLogger *logger;
  const GtEncseq *encseq = NULL;

  logger = gt_logger_new(idxlocalioptions->verbose,
                         GT_LOGGER_DEFLT_PREFIX, stdout);

  if (idxlocalioptions->doonline)
  {
    GtEncseqLoader *el;
    el = gt_encseq_loader_new();
    gt_encseq_loader_require_multiseq_support(el);
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_set_logger(el, logger);
    encseq = gt_encseq_loader_load(el, gt_str_get(idxlocalioptions->indexname),
                                   err);
    gt_encseq_loader_delete(el);
    if (encseq == NULL)
    {
      haserr = true;
    }
  } else
  {
    genericindex = genericindex_new(gt_str_get(idxlocalioptions->indexname),
                                    idxlocalioptions->withesa,
                                    idxlocalioptions->withesa ||
                                    idxlocalioptions->docompare,
                                    false,
                                    true,
                                    0,
                                    logger,
                                    err);
    if (genericindex == NULL)
    {
      haserr = true;
    } else
    {
      encseq = genericindex_getencseq(genericindex);
    }
  }
  if (!haserr)
  {
    GtSeqIterator *seqit;
    const GtUchar *query;
    unsigned long querylen;
    char *desc = NULL;
    int retval;
    Limdfsresources *limdfsresources = NULL;
    const AbstractDfstransformer *dfst;
    SWdpresource *swdpresource = NULL;
    Showmatchinfo showmatchinfo;
    ProcessIdxMatch processmatch;
    GtAlphabet *a;
    void *processmatchinfoonline, *processmatchinfooffline;
    Storematchinfo storeonline, storeoffline;

    a = gt_encseq_alphabet(encseq);
    if (idxlocalioptions->docompare)
    {
      processmatch = storematch;
      gt_initstorematch(&storeonline,encseq);
      gt_initstorematch(&storeoffline,encseq);
      processmatchinfoonline = &storeonline;
      processmatchinfooffline = &storeoffline;
    } else
    {
      processmatch = showmatch;
      showmatchinfo.encseq = encseq;
      showmatchinfo.characters = gt_alphabet_characters(a);
      showmatchinfo.wildcardshow = gt_alphabet_wildcard_show(a);
      showmatchinfo.showalignment = idxlocalioptions->showalignment;
      processmatchinfoonline = processmatchinfooffline = &showmatchinfo;
    }
    if (idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      swdpresource = gt_newSWdpresource(idxlocalioptions->matchscore,
                                     idxlocalioptions->mismatchscore,
                                     idxlocalioptions->gapextend,
                                     idxlocalioptions->threshold,
                                     idxlocalioptions->showalignment,
                                     processmatch,
                                     processmatchinfoonline);
    }
    dfst = gt_locali_AbstractDfstransformer();
    if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      gt_assert(genericindex != NULL);
      limdfsresources = gt_newLimdfsresources(genericindex,
                                           true,
                                           0,
                                           0,    /* maxpathlength */
                                           true, /* keepexpandedonstack */
                                           processmatch,
                                           processmatchinfooffline,
                                           NULL, /* processresult */
                                           NULL, /* processresult info */
                                           dfst);
    }
    seqit = gt_seq_iterator_sequence_buffer_new(idxlocalioptions->queryfiles,
                                               err);
    if (!seqit)
      haserr = true;
    if (!haserr)
    {
      gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(a));
      for (showmatchinfo.queryunit = 0; /* Nothing */;
           showmatchinfo.queryunit++)
      {
        retval = gt_seq_iterator_next(seqit,
                                     &query,
                                     &querylen,
                                     &desc,
                                     err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
        printf("process sequence " Formatuint64_t " of length %lu\n",
                PRINTuint64_tcast(showmatchinfo.queryunit),querylen);
        if (idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          gt_multiapplysmithwaterman(swdpresource,encseq,query,querylen);
        }
        if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          gt_indexbasedlocali(limdfsresources,
                           idxlocalioptions->matchscore,
                           idxlocalioptions->mismatchscore,
                           idxlocalioptions->gapstart,
                           idxlocalioptions->gapextend,
                           idxlocalioptions->threshold,
                           query,
                           querylen,
                           dfst);
        }
        if (idxlocalioptions->docompare)
        {
          gt_checkandresetstorematch(showmatchinfo.queryunit,
                                  &storeonline,&storeoffline);
        }
      }
      if (limdfsresources != NULL)
      {
        gt_freeLimdfsresources(&limdfsresources,dfst);
      }
      if (swdpresource != NULL)
      {
        gt_freeSWdpresource(swdpresource);
        swdpresource = NULL;
      }
      gt_seq_iterator_delete(seqit);
    }
    if (idxlocalioptions->docompare)
    {
      gt_freestorematch(&storeonline);
      gt_freestorematch(&storeoffline);
    }
  }
  if (genericindex == NULL)
  {
    gt_encseq_delete((GtEncseq *) encseq);
    encseq = NULL;
  } else
  {
    genericindex_delete(genericindex);
  }
  gt_logger_delete(logger);
  logger = NULL;
  return haserr ? -1 : 0;
}
Esempio n. 6
0
static int gt_encseq_info_runner(GT_UNUSED int argc, const char **argv,
                           int parsed_args, void *tool_arguments,
                           GtError *err)
{
  GtEncseqInfoArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha;
  const GtUchar *chars;
  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->nomap) {
    GtEncseqMetadata *emd = gt_encseq_metadata_new(argv[parsed_args], err);
    if (!emd)
      had_err = -1;

    if (!had_err) {
      if (!arguments->noindexname) {
        gt_file_xprintf(arguments->outfp, "index name: ");
        gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]);
      }

      gt_file_xprintf(arguments->outfp, "file format version: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                          gt_encseq_metadata_version(emd));

      gt_file_xprintf(arguments->outfp, "64-bit file: ");
      gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_metadata_is64bit(emd)
                                                  ? "yes"
                                                  : "no");

      gt_file_xprintf(arguments->outfp, "total length: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_metadata_total_length(emd));

      gt_file_xprintf(arguments->outfp, "number of sequences: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                      gt_encseq_metadata_num_of_sequences(emd));

      gt_file_xprintf(arguments->outfp, "number of files: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_metadata_num_of_files(emd));

      gt_file_xprintf(arguments->outfp, "length of shortest/longest "
                                        "sequence: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n",
                                        gt_encseq_metadata_min_seq_length(emd),
                                        gt_encseq_metadata_max_seq_length(emd));

      gt_file_xprintf(arguments->outfp, "accesstype: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                 gt_encseq_access_type_str(gt_encseq_metadata_accesstype(emd)));

      alpha = gt_encseq_metadata_alphabet(emd);
      chars = gt_alphabet_characters(alpha);
      gt_file_xprintf(arguments->outfp, "alphabet size: ");
      gt_file_xprintf(arguments->outfp, "%u\n",
                                        gt_alphabet_num_of_chars(alpha));
      gt_file_xprintf(arguments->outfp, "alphabet characters: ");
      gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha),
                                        (char*) chars);
      if (gt_alphabet_is_dna(alpha))
        gt_file_xprintf(arguments->outfp, " (DNA)");
      if (gt_alphabet_is_protein(alpha))
        gt_file_xprintf(arguments->outfp, " (Protein)");
      gt_file_xprintf(arguments->outfp, "\n");
      if (arguments->show_alphabet) {
        GtStr *out = gt_str_new();
        gt_alphabet_to_str(alpha, out);
        gt_file_xprintf(arguments->outfp, "alphabet definition:\n");
        gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out));
        gt_str_delete(out);
      }

    }
    gt_encseq_metadata_delete(emd);
  } else {
    GtEncseqLoader *encseq_loader;
    GtEncseq *encseq;

    encseq_loader = gt_encseq_loader_new();
    if (arguments->mirror)
      gt_encseq_loader_mirror(encseq_loader);
    if (!(encseq = gt_encseq_loader_load(encseq_loader,
                                         argv[parsed_args], err)))
      had_err = -1;

    if (!had_err) {
      const GtStrArray *filenames;
      GtUword i;

      if (!arguments->noindexname) {
        gt_file_xprintf(arguments->outfp, "index name: ");
        gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]);
      }

      gt_file_xprintf(arguments->outfp, "file format version: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_version(encseq));

      gt_file_xprintf(arguments->outfp, "64-bit file: ");
      gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_is_64_bit(encseq)
                                                   ? "yes"
                                                   : "no");

      gt_file_xprintf(arguments->outfp, "total length: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_total_length(encseq));

      gt_file_xprintf(arguments->outfp, "compressed size: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" bytes\n",
                                        gt_encseq_sizeofrep(encseq));

      gt_file_xprintf(arguments->outfp, "number of sequences: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_num_of_sequences(encseq));

      gt_file_xprintf(arguments->outfp, "number of files: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_num_of_files(encseq));

      gt_file_xprintf(arguments->outfp, "length of shortest/longest "
                                        "sequence: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n",
                                      gt_encseq_min_seq_length(encseq),
                                      gt_encseq_max_seq_length(encseq));

      filenames = gt_encseq_filenames(encseq);
      gt_file_xprintf(arguments->outfp, "original filenames:\n");
      for (i = 0; i < gt_str_array_size(filenames); i++) {
        gt_file_xprintf(arguments->outfp, "\t%s ("GT_WU" characters)\n",
                                          gt_str_array_get(filenames, i),
                                          (GtUword)
                                     gt_encseq_effective_filelength(encseq, i));
      }

      alpha = gt_encseq_alphabet(encseq);
      chars = gt_alphabet_characters(alpha);
      gt_file_xprintf(arguments->outfp, "alphabet size: ");
      gt_file_xprintf(arguments->outfp, "%u\n",
                                        gt_alphabet_num_of_chars(alpha));
      gt_file_xprintf(arguments->outfp, "alphabet characters: ");
      gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha),
                                        (char*) chars);
      if (gt_alphabet_is_dna(alpha))
        gt_file_xprintf(arguments->outfp, " (DNA)");
      if (gt_alphabet_is_protein(alpha))
        gt_file_xprintf(arguments->outfp, " (Protein)");
      gt_file_xprintf(arguments->outfp, "\n");
      if (arguments->show_alphabet) {
        GtStr *out = gt_str_new();
        gt_alphabet_to_str(alpha, out);
        gt_file_xprintf(arguments->outfp, "alphabet definition:\n");
        gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out));
        gt_str_delete(out);
      }

      gt_file_xprintf(arguments->outfp, "character distribution:\n");
      for (i = 0; i < gt_alphabet_num_of_chars(alpha); i++) {
        GtUword cc;
        cc = gt_encseq_charcount(encseq, gt_alphabet_encode(alpha, chars[i]));
        gt_file_xprintf(arguments->outfp, "\t%c: "GT_WU" (%.2f%%)\n",
                                          (char) chars[i],
                                          cc,
                             (cc /(double) (gt_encseq_total_length(encseq)
                                  - gt_encseq_num_of_sequences(encseq)+1))*100);
      }

      gt_file_xprintf(arguments->outfp, "number of wildcards: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n",
                                        gt_encseq_wildcards(encseq),
                                        gt_encseq_realwildcardranges(encseq));

      gt_file_xprintf(arguments->outfp, "number of special characters: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n",
                                        gt_encseq_specialcharacters(encseq),
                                        gt_encseq_realspecialranges(encseq));

      gt_file_xprintf(arguments->outfp, "length of longest non-special "
                                        "character stretch: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                   gt_encseq_lengthoflongestnonspecial(encseq));

      gt_file_xprintf(arguments->outfp, "accesstype: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                   gt_encseq_access_type_str(gt_encseq_accesstype_get(encseq)));

      gt_file_xprintf(arguments->outfp, "bits used per character: ");
      gt_file_xprintf(arguments->outfp, "%f\n",
        (double) ((uint64_t) CHAR_BIT *
                  (uint64_t) gt_encseq_sizeofrep(encseq)) /
        (double) gt_encseq_total_length(encseq));

      gt_file_xprintf(arguments->outfp, "has special ranges: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                        gt_encseq_has_specialranges(encseq)
                                          ? "yes"
                                          : "no");

      gt_file_xprintf(arguments->outfp, "has description support: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                       gt_encseq_has_description_support(encseq)
                                          ? "yes"
                                          : "no");

      if (gt_encseq_has_description_support(encseq)) {
        gt_file_xprintf(arguments->outfp, "length of longest description: ");
        gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                          gt_encseq_max_desc_length(encseq));
      }

      gt_file_xprintf(arguments->outfp, "has multiple sequence support: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                        gt_encseq_has_multiseq_support(encseq)
                                          ? "yes"
                                          : "no");
    }
    gt_encseq_delete(encseq);
    gt_encseq_loader_delete(encseq_loader);
  }

  return had_err;
}