コード例 #1
0
ファイル: encseq_lua.c プロジェクト: kowsky/genometools
static int encseq_loader_lua_mirror(lua_State *L)
{
  GtEncseqLoader **loader;
  loader = check_encseq_loader(L, 1);
  gt_assert(*loader);
  gt_encseq_loader_mirror(*loader);
  return 0;
}
コード例 #2
0
static int gt_readjoiner_assembly_paths2seq(const char *readset,
    GtUword lengthcutoff, bool showpaths, bool astat,
    double coverage, bool load_copynum, GtUword buffersize,
    GtLogger *default_logger, GtTimer **timer, GtError *err)
{
  int had_err;
  GtEncseqLoader *el = gt_encseq_loader_new();
  GtEncseq *reads;

  if (gt_showtime_enabled())
  {
    gt_assert(timer != NULL);
    if (*timer == NULL) /* paths2seq */
    {
      *timer = gt_timer_new_with_progress_description(
          GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ);
      gt_timer_show_cpu_time_by_progress(*timer);
      gt_timer_start(*timer);
    }
    else
      gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ,
          stdout);
  }
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ);
  gt_encseq_loader_drop_description_support(el);
  gt_encseq_loader_disable_autosupport(el);
  gt_encseq_loader_mirror(el);
  reads = gt_encseq_loader_load(el, readset, err);
  gt_assert(reads != NULL);
  gt_readjoiner_assembly_pump_encseq_through_cache(reads);
  if (gt_showtime_enabled())
    gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS,
        stdout);
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS);
  had_err = gt_contigpaths_to_fasta(readset, GT_READJOINER_SUFFIX_CONTIG_PATHS,
      GT_READJOINER_SUFFIX_CONTIGS, reads, lengthcutoff, showpaths,
      astat, coverage, load_copynum, (size_t)buffersize, default_logger, err);
  gt_encseq_delete(reads);
  gt_encseq_loader_delete(el);
  return had_err;
}
コード例 #3
0
ファイル: gt_genomediff.c プロジェクト: potter-s/genometools
static int gt_genomediff_runner(int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
    bool mirrored = false;
    int had_err = 0,
        i;
    GtEncseq              *encseq = NULL;
    GtGenomediffArguments *arguments = tool_arguments;
    GtLogger              *logger;
    GtShuUnitFileInfo     *unit_info = NULL;
    GtTimer               *timer = NULL;

    gt_error_check(err);
    gt_assert(arguments);

    logger = gt_logger_new(arguments->verbose,
                           GT_LOGGER_DEFLT_PREFIX,
                           stdout);
    gt_assert(logger);

    for (i = parsed_args; i < argc; i++) {
        gt_str_array_add_cstr(arguments->filenames, argv[i]);
    }

    if (gt_showtime_enabled()) {
        timer = gt_timer_new_with_progress_description("start");
        gt_timer_start(timer);
        gt_assert(timer);
    }

    if (arguments->with_units) {
        gt_logger_log(logger, "unitfile option set, filename is %s\n",
                      gt_str_get(arguments->unitfile));
    }

    if (timer != NULL)
        gt_timer_show_progress(timer, "start shu search", stdout);

    if (gt_str_array_size(arguments->filenames) > 1UL) {
        GtEncseqEncoder *ee = gt_encseq_encoder_new();
        gt_encseq_encoder_set_timer(ee, timer);
        gt_encseq_encoder_set_logger(ee, logger);
        /* kr only makes sense for dna, so we can check this already with ee */
        gt_encseq_encoder_set_input_dna(ee);
        had_err = gt_encseq_encoder_encode(ee, arguments->filenames,
                                           gt_str_get(arguments->indexname), err);
        gt_encseq_encoder_delete(ee);
    }
    else {
        gt_str_append_str(arguments->indexname,
                          gt_str_array_get_str(arguments->filenames, 0));
        if (arguments->with_esa || arguments->with_pck) {
            GtStr *current_line = gt_str_new();
            FILE *prj_fp;
            const char *buffer;
            char **elements = NULL;

            prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname),
                                             GT_PROJECTFILESUFFIX,"rb",err);
            if (prj_fp == NULL)
                had_err = -1;
            while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) {
                buffer = gt_str_get(current_line);
                if (elements != NULL) {
                    gt_free(elements[0]);
                    gt_free(elements[1]);
                }
                gt_free(elements);
                elements = gt_cstr_split(buffer, '=');
                gt_log_log("%s", elements[0]);
                if (strcmp("mirrored", elements[0]) == 0) {
                    gt_log_log("%s", elements[1]);
                    if (strcmp("1", elements[1]) == 0) {
                        mirrored = true;
                        gt_log_log("sequences are treated as mirrored");
                    }
                }
                gt_str_reset(current_line);
            }
            gt_str_delete(current_line);
            if (elements != NULL) {
                gt_free(elements[0]);
                gt_free(elements[1]);
            }
            gt_free(elements);
            gt_fa_xfclose(prj_fp);
        }
    }

    if (!had_err) {
        GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts,
                             err);
        if (mirrored)
            gt_encseq_loader_mirror(el);
        encseq =
            gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err);
        gt_encseq_loader_delete(el);
    }
    if (encseq == NULL)
        had_err = -1;
    if (!had_err) {
        unit_info = gt_shu_unit_info_new(encseq);
        if (arguments->with_units)
            had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info,
                                                 logger, err);
    }

    if (!had_err) {
        uint64_t **shusums = NULL;
        if (arguments->with_esa || arguments->with_pck) {
            shusums = gt_genomediff_shulen_sum(arguments, unit_info,
                                               logger, timer, err);
            if (shusums == NULL)
                had_err = -1;
        }
        else {
            const bool doesa = true;
            GenomediffInfo gd_info;
            Suffixeratoroptions sopts;
            sopts.beverbose = arguments->verbose;
            sopts.indexname = arguments->indexname;
            sopts.db = NULL;
            sopts.encopts = NULL;
            sopts.genomediff = true;
            sopts.inputindex = arguments->indexname;
            sopts.loadopts = arguments->loadopts;
            sopts.showprogress = false;
            sopts.idxopts = arguments->idxopts;

            gt_assert(unit_info != NULL);
            gt_array2dim_calloc(shusums, unit_info->num_of_genomes,
                                unit_info->num_of_genomes);
            gd_info.shulensums = shusums;
            gd_info.unit_info = unit_info;
            had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err);
        }
        if (!had_err && shusums != NULL) {
            had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info,
                                            arguments->with_pck, logger, timer, err);
            gt_array2dim_delete(shusums);
        }
    }

    if (timer != NULL) {
        gt_timer_show_progress_final(timer, stdout);
        gt_timer_delete(timer);
    }
    gt_logger_delete(logger);
    gt_encseq_delete(encseq);
    gt_shu_unit_info_delete(unit_info);

    return had_err;
}
コード例 #4
0
ファイル: gt_encseq_info.c プロジェクト: kowsky/genometools
static int gt_encseq_info_runner(GT_UNUSED int argc, const char **argv,
                           int parsed_args, void *tool_arguments,
                           GtError *err)
{
  GtEncseqInfoArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha;
  const GtUchar *chars;
  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->nomap) {
    GtEncseqMetadata *emd = gt_encseq_metadata_new(argv[parsed_args], err);
    if (!emd)
      had_err = -1;

    if (!had_err) {
      if (!arguments->noindexname) {
        gt_file_xprintf(arguments->outfp, "index name: ");
        gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]);
      }

      gt_file_xprintf(arguments->outfp, "file format version: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                          gt_encseq_metadata_version(emd));

      gt_file_xprintf(arguments->outfp, "64-bit file: ");
      gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_metadata_is64bit(emd)
                                                  ? "yes"
                                                  : "no");

      gt_file_xprintf(arguments->outfp, "total length: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_metadata_total_length(emd));

      gt_file_xprintf(arguments->outfp, "number of sequences: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                      gt_encseq_metadata_num_of_sequences(emd));

      gt_file_xprintf(arguments->outfp, "number of files: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_metadata_num_of_files(emd));

      gt_file_xprintf(arguments->outfp, "length of shortest/longest "
                                        "sequence: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n",
                                        gt_encseq_metadata_min_seq_length(emd),
                                        gt_encseq_metadata_max_seq_length(emd));

      gt_file_xprintf(arguments->outfp, "accesstype: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                 gt_encseq_access_type_str(gt_encseq_metadata_accesstype(emd)));

      alpha = gt_encseq_metadata_alphabet(emd);
      chars = gt_alphabet_characters(alpha);
      gt_file_xprintf(arguments->outfp, "alphabet size: ");
      gt_file_xprintf(arguments->outfp, "%u\n",
                                        gt_alphabet_num_of_chars(alpha));
      gt_file_xprintf(arguments->outfp, "alphabet characters: ");
      gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha),
                                        (char*) chars);
      if (gt_alphabet_is_dna(alpha))
        gt_file_xprintf(arguments->outfp, " (DNA)");
      if (gt_alphabet_is_protein(alpha))
        gt_file_xprintf(arguments->outfp, " (Protein)");
      gt_file_xprintf(arguments->outfp, "\n");
      if (arguments->show_alphabet) {
        GtStr *out = gt_str_new();
        gt_alphabet_to_str(alpha, out);
        gt_file_xprintf(arguments->outfp, "alphabet definition:\n");
        gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out));
        gt_str_delete(out);
      }

    }
    gt_encseq_metadata_delete(emd);
  } else {
    GtEncseqLoader *encseq_loader;
    GtEncseq *encseq;

    encseq_loader = gt_encseq_loader_new();
    if (arguments->mirror)
      gt_encseq_loader_mirror(encseq_loader);
    if (!(encseq = gt_encseq_loader_load(encseq_loader,
                                         argv[parsed_args], err)))
      had_err = -1;

    if (!had_err) {
      const GtStrArray *filenames;
      GtUword i;

      if (!arguments->noindexname) {
        gt_file_xprintf(arguments->outfp, "index name: ");
        gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]);
      }

      gt_file_xprintf(arguments->outfp, "file format version: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_version(encseq));

      gt_file_xprintf(arguments->outfp, "64-bit file: ");
      gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_is_64_bit(encseq)
                                                   ? "yes"
                                                   : "no");

      gt_file_xprintf(arguments->outfp, "total length: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_total_length(encseq));

      gt_file_xprintf(arguments->outfp, "compressed size: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" bytes\n",
                                        gt_encseq_sizeofrep(encseq));

      gt_file_xprintf(arguments->outfp, "number of sequences: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_num_of_sequences(encseq));

      gt_file_xprintf(arguments->outfp, "number of files: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                        gt_encseq_num_of_files(encseq));

      gt_file_xprintf(arguments->outfp, "length of shortest/longest "
                                        "sequence: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n",
                                      gt_encseq_min_seq_length(encseq),
                                      gt_encseq_max_seq_length(encseq));

      filenames = gt_encseq_filenames(encseq);
      gt_file_xprintf(arguments->outfp, "original filenames:\n");
      for (i = 0; i < gt_str_array_size(filenames); i++) {
        gt_file_xprintf(arguments->outfp, "\t%s ("GT_WU" characters)\n",
                                          gt_str_array_get(filenames, i),
                                          (GtUword)
                                     gt_encseq_effective_filelength(encseq, i));
      }

      alpha = gt_encseq_alphabet(encseq);
      chars = gt_alphabet_characters(alpha);
      gt_file_xprintf(arguments->outfp, "alphabet size: ");
      gt_file_xprintf(arguments->outfp, "%u\n",
                                        gt_alphabet_num_of_chars(alpha));
      gt_file_xprintf(arguments->outfp, "alphabet characters: ");
      gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha),
                                        (char*) chars);
      if (gt_alphabet_is_dna(alpha))
        gt_file_xprintf(arguments->outfp, " (DNA)");
      if (gt_alphabet_is_protein(alpha))
        gt_file_xprintf(arguments->outfp, " (Protein)");
      gt_file_xprintf(arguments->outfp, "\n");
      if (arguments->show_alphabet) {
        GtStr *out = gt_str_new();
        gt_alphabet_to_str(alpha, out);
        gt_file_xprintf(arguments->outfp, "alphabet definition:\n");
        gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out));
        gt_str_delete(out);
      }

      gt_file_xprintf(arguments->outfp, "character distribution:\n");
      for (i = 0; i < gt_alphabet_num_of_chars(alpha); i++) {
        GtUword cc;
        cc = gt_encseq_charcount(encseq, gt_alphabet_encode(alpha, chars[i]));
        gt_file_xprintf(arguments->outfp, "\t%c: "GT_WU" (%.2f%%)\n",
                                          (char) chars[i],
                                          cc,
                             (cc /(double) (gt_encseq_total_length(encseq)
                                  - gt_encseq_num_of_sequences(encseq)+1))*100);
      }

      gt_file_xprintf(arguments->outfp, "number of wildcards: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n",
                                        gt_encseq_wildcards(encseq),
                                        gt_encseq_realwildcardranges(encseq));

      gt_file_xprintf(arguments->outfp, "number of special characters: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n",
                                        gt_encseq_specialcharacters(encseq),
                                        gt_encseq_realspecialranges(encseq));

      gt_file_xprintf(arguments->outfp, "length of longest non-special "
                                        "character stretch: ");
      gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                   gt_encseq_lengthoflongestnonspecial(encseq));

      gt_file_xprintf(arguments->outfp, "accesstype: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                   gt_encseq_access_type_str(gt_encseq_accesstype_get(encseq)));

      gt_file_xprintf(arguments->outfp, "bits used per character: ");
      gt_file_xprintf(arguments->outfp, "%f\n",
        (double) ((uint64_t) CHAR_BIT *
                  (uint64_t) gt_encseq_sizeofrep(encseq)) /
        (double) gt_encseq_total_length(encseq));

      gt_file_xprintf(arguments->outfp, "has special ranges: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                        gt_encseq_has_specialranges(encseq)
                                          ? "yes"
                                          : "no");

      gt_file_xprintf(arguments->outfp, "has description support: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                       gt_encseq_has_description_support(encseq)
                                          ? "yes"
                                          : "no");

      if (gt_encseq_has_description_support(encseq)) {
        gt_file_xprintf(arguments->outfp, "length of longest description: ");
        gt_file_xprintf(arguments->outfp, ""GT_WU"\n",
                                          gt_encseq_max_desc_length(encseq));
      }

      gt_file_xprintf(arguments->outfp, "has multiple sequence support: ");
      gt_file_xprintf(arguments->outfp, "%s\n",
                                        gt_encseq_has_multiseq_support(encseq)
                                          ? "yes"
                                          : "no");
    }
    gt_encseq_delete(encseq);
    gt_encseq_loader_delete(encseq_loader);
  }

  return had_err;
}
コード例 #5
0
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GT_UNUSED GtError *err)
{
  GtReadjoinerCnttestArguments *arguments = tool_arguments;
  GtEncseqLoader *el = NULL;
  GtEncseq *reads = NULL;
  GtBitsequence *bits = NULL;
  GtUword nofreads;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST)
  {
    GtStr *fn = NULL;
    fn = gt_str_clone(arguments->readset);
    gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err);
    gt_str_delete(fn);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE ||
      arguments->test == GT_READJOINER_CNTTEST_KMP)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    if (!arguments->singlestrand)
      gt_encseq_loader_mirror(el);
    reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err);
    if (reads == NULL)
      had_err = -1;
    else
    {
      gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand,
          false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true,
          NULL, NULL, false, NULL, &bits, &nofreads);
    }
    gt_encseq_delete(reads);
    gt_encseq_loader_delete(el);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_ESA)
  {
    Sequentialsuffixarrayreader *ssar = NULL;
    GtUword readlength = 0, firstrevcompl = 0;
    GtLogger *verbose_logger = gt_logger_new(arguments->verbose,
        GT_LOGGER_DEFLT_PREFIX, stderr);
    ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get(
          arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB,
        true, verbose_logger, err);
    if (gt_error_is_set(err))
      had_err = -1;
    else
    {
      nofreads = gt_encseq_num_of_sequences(ssar->encseq);
      if (!arguments->singlestrand)
      {
        nofreads = GT_DIV2(nofreads);
        firstrevcompl = nofreads;
      }
      GT_INITBITTAB(bits, nofreads);
      if (!arguments->singlestrand)
      if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH)
        readlength = gt_encseq_seqlength(ssar->encseq, 0);
      (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0
          : firstrevcompl, readlength);
    }
    if (ssar != NULL)
      gt_freeSequentialsuffixarrayreader(&ssar);
    gt_logger_delete(verbose_logger);
  }
  else
  {
    gt_assert(false);
  }
  if (!had_err)
    had_err = gt_cntlist_show(bits, nofreads, NULL, false, err);
  gt_free(bits);
  return had_err;
}