コード例 #1
0
static int encode_sequence_files(GtStrArray *infiles, GtEncseqOptions *opts,
                                 const char *indexname, bool verbose,
                                 bool esq_no_header,
                                 GtError *err)
{
  GtEncseqEncoder *encseq_encoder;
  GtLogger *logger;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(infiles && gt_str_array_size(infiles) > 0 && opts);
  logger = gt_logger_new(verbose, "# ", stderr);
  encseq_encoder = gt_encseq_encoder_new_from_options(opts, err);
  if (!encseq_encoder)
    had_err = -1;
  if (!had_err) {
    gt_encseq_encoder_set_logger(encseq_encoder, logger);
    if (esq_no_header)
    {
      gt_encseq_encoder_disable_esq_header(encseq_encoder);
    }
    had_err = gt_encseq_encoder_encode(encseq_encoder, infiles, indexname, err);
  }
  gt_encseq_encoder_delete(encseq_encoder);
  gt_logger_delete(logger);
  return had_err;
}
コード例 #2
0
ファイル: encseq_lua.c プロジェクト: kowsky/genometools
static int encseq_encoder_lua_delete(lua_State *L)
{
  GtEncseqEncoder **encoder;
  encoder = check_encseq_encoder(L, 1);
  gt_encseq_encoder_delete(*encoder);
  return 0;
}
コード例 #3
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
static int construct_bioseq_files(GtBioseq *bs, GtStr *bioseq_indexname,
                                  GtError *err)
{
  GtStr *sequence_filename;
  GtEncseqEncoder *ee;
  GtStrArray *indexfn;
  int had_err = 0;

  gt_error_check(err);

  /* register the signal handler to remove incomplete files upon termination */
  if (!bs->use_stdin) {
    gt_bioseq_index_filename = gt_str_get(bs->sequence_file);
    gt_sig_register_all(remove_bioseq_files);
  }

  /* if stdin is used as input, we need to create a tempfile containing the
     sequence as GtEncseq cannot be built from stdin directly */
  if (bs->use_stdin) {
    GtStr *tmpfilename;
    FILE *tmpfile = NULL;
    int i;
    char buf[BUFSIZ];
    tmpfilename = gt_str_new();
    tmpfile = gt_xtmpfp(tmpfilename);
    gt_assert(tmpfile);
    i = 1;
    while (i > 0) {
      i = fread(buf, 1, BUFSIZ, stdin);
      if (i > 0) fwrite(buf, 1, i, tmpfile);
    }
    gt_fa_xfclose(tmpfile);
    sequence_filename = tmpfilename;
  } else {
    sequence_filename = gt_str_ref(bs->sequence_file);
  }
  gt_assert(gt_str_length(sequence_filename) > 0);
  ee = gt_encseq_encoder_new();
  gt_encseq_encoder_enable_description_support(ee);
  gt_encseq_encoder_enable_md5_support(ee);
  gt_encseq_encoder_enable_multiseq_support(ee);
  gt_encseq_encoder_enable_lossless_support(ee);
  indexfn = gt_str_array_new();
  gt_str_array_add(indexfn, sequence_filename);
  gt_str_delete(sequence_filename);
  had_err = gt_encseq_encoder_encode(ee, indexfn,
                                     gt_str_get(bioseq_indexname), err);
  /* unregister the signal handler */
   if (!bs->use_stdin)
    gt_sig_unregister_all();

  gt_str_array_delete(indexfn);
  gt_encseq_encoder_delete(ee);
  return had_err;
}
コード例 #4
0
ファイル: gt_genomediff.c プロジェクト: potter-s/genometools
static int gt_genomediff_runner(int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
    bool mirrored = false;
    int had_err = 0,
        i;
    GtEncseq              *encseq = NULL;
    GtGenomediffArguments *arguments = tool_arguments;
    GtLogger              *logger;
    GtShuUnitFileInfo     *unit_info = NULL;
    GtTimer               *timer = NULL;

    gt_error_check(err);
    gt_assert(arguments);

    logger = gt_logger_new(arguments->verbose,
                           GT_LOGGER_DEFLT_PREFIX,
                           stdout);
    gt_assert(logger);

    for (i = parsed_args; i < argc; i++) {
        gt_str_array_add_cstr(arguments->filenames, argv[i]);
    }

    if (gt_showtime_enabled()) {
        timer = gt_timer_new_with_progress_description("start");
        gt_timer_start(timer);
        gt_assert(timer);
    }

    if (arguments->with_units) {
        gt_logger_log(logger, "unitfile option set, filename is %s\n",
                      gt_str_get(arguments->unitfile));
    }

    if (timer != NULL)
        gt_timer_show_progress(timer, "start shu search", stdout);

    if (gt_str_array_size(arguments->filenames) > 1UL) {
        GtEncseqEncoder *ee = gt_encseq_encoder_new();
        gt_encseq_encoder_set_timer(ee, timer);
        gt_encseq_encoder_set_logger(ee, logger);
        /* kr only makes sense for dna, so we can check this already with ee */
        gt_encseq_encoder_set_input_dna(ee);
        had_err = gt_encseq_encoder_encode(ee, arguments->filenames,
                                           gt_str_get(arguments->indexname), err);
        gt_encseq_encoder_delete(ee);
    }
    else {
        gt_str_append_str(arguments->indexname,
                          gt_str_array_get_str(arguments->filenames, 0));
        if (arguments->with_esa || arguments->with_pck) {
            GtStr *current_line = gt_str_new();
            FILE *prj_fp;
            const char *buffer;
            char **elements = NULL;

            prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname),
                                             GT_PROJECTFILESUFFIX,"rb",err);
            if (prj_fp == NULL)
                had_err = -1;
            while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) {
                buffer = gt_str_get(current_line);
                if (elements != NULL) {
                    gt_free(elements[0]);
                    gt_free(elements[1]);
                }
                gt_free(elements);
                elements = gt_cstr_split(buffer, '=');
                gt_log_log("%s", elements[0]);
                if (strcmp("mirrored", elements[0]) == 0) {
                    gt_log_log("%s", elements[1]);
                    if (strcmp("1", elements[1]) == 0) {
                        mirrored = true;
                        gt_log_log("sequences are treated as mirrored");
                    }
                }
                gt_str_reset(current_line);
            }
            gt_str_delete(current_line);
            if (elements != NULL) {
                gt_free(elements[0]);
                gt_free(elements[1]);
            }
            gt_free(elements);
            gt_fa_xfclose(prj_fp);
        }
    }

    if (!had_err) {
        GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts,
                             err);
        if (mirrored)
            gt_encseq_loader_mirror(el);
        encseq =
            gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err);
        gt_encseq_loader_delete(el);
    }
    if (encseq == NULL)
        had_err = -1;
    if (!had_err) {
        unit_info = gt_shu_unit_info_new(encseq);
        if (arguments->with_units)
            had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info,
                                                 logger, err);
    }

    if (!had_err) {
        uint64_t **shusums = NULL;
        if (arguments->with_esa || arguments->with_pck) {
            shusums = gt_genomediff_shulen_sum(arguments, unit_info,
                                               logger, timer, err);
            if (shusums == NULL)
                had_err = -1;
        }
        else {
            const bool doesa = true;
            GenomediffInfo gd_info;
            Suffixeratoroptions sopts;
            sopts.beverbose = arguments->verbose;
            sopts.indexname = arguments->indexname;
            sopts.db = NULL;
            sopts.encopts = NULL;
            sopts.genomediff = true;
            sopts.inputindex = arguments->indexname;
            sopts.loadopts = arguments->loadopts;
            sopts.showprogress = false;
            sopts.idxopts = arguments->idxopts;

            gt_assert(unit_info != NULL);
            gt_array2dim_calloc(shusums, unit_info->num_of_genomes,
                                unit_info->num_of_genomes);
            gd_info.shulensums = shusums;
            gd_info.unit_info = unit_info;
            had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err);
        }
        if (!had_err && shusums != NULL) {
            had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info,
                                            arguments->with_pck, logger, timer, err);
            gt_array2dim_delete(shusums);
        }
    }

    if (timer != NULL) {
        gt_timer_show_progress_final(timer, stdout);
        gt_timer_delete(timer);
    }
    gt_logger_delete(logger);
    gt_encseq_delete(encseq);
    gt_shu_unit_info_delete(unit_info);

    return had_err;
}