static int encode_sequence_files(GtStrArray *infiles, GtEncseqOptions *opts, const char *indexname, bool verbose, bool esq_no_header, GtError *err) { GtEncseqEncoder *encseq_encoder; GtLogger *logger; int had_err = 0; gt_error_check(err); gt_assert(infiles && gt_str_array_size(infiles) > 0 && opts); logger = gt_logger_new(verbose, "# ", stderr); encseq_encoder = gt_encseq_encoder_new_from_options(opts, err); if (!encseq_encoder) had_err = -1; if (!had_err) { gt_encseq_encoder_set_logger(encseq_encoder, logger); if (esq_no_header) { gt_encseq_encoder_disable_esq_header(encseq_encoder); } had_err = gt_encseq_encoder_encode(encseq_encoder, infiles, indexname, err); } gt_encseq_encoder_delete(encseq_encoder); gt_logger_delete(logger); return had_err; }
static int encseq_encoder_lua_delete(lua_State *L) { GtEncseqEncoder **encoder; encoder = check_encseq_encoder(L, 1); gt_encseq_encoder_delete(*encoder); return 0; }
static int construct_bioseq_files(GtBioseq *bs, GtStr *bioseq_indexname, GtError *err) { GtStr *sequence_filename; GtEncseqEncoder *ee; GtStrArray *indexfn; int had_err = 0; gt_error_check(err); /* register the signal handler to remove incomplete files upon termination */ if (!bs->use_stdin) { gt_bioseq_index_filename = gt_str_get(bs->sequence_file); gt_sig_register_all(remove_bioseq_files); } /* if stdin is used as input, we need to create a tempfile containing the sequence as GtEncseq cannot be built from stdin directly */ if (bs->use_stdin) { GtStr *tmpfilename; FILE *tmpfile = NULL; int i; char buf[BUFSIZ]; tmpfilename = gt_str_new(); tmpfile = gt_xtmpfp(tmpfilename); gt_assert(tmpfile); i = 1; while (i > 0) { i = fread(buf, 1, BUFSIZ, stdin); if (i > 0) fwrite(buf, 1, i, tmpfile); } gt_fa_xfclose(tmpfile); sequence_filename = tmpfilename; } else { sequence_filename = gt_str_ref(bs->sequence_file); } gt_assert(gt_str_length(sequence_filename) > 0); ee = gt_encseq_encoder_new(); gt_encseq_encoder_enable_description_support(ee); gt_encseq_encoder_enable_md5_support(ee); gt_encseq_encoder_enable_multiseq_support(ee); gt_encseq_encoder_enable_lossless_support(ee); indexfn = gt_str_array_new(); gt_str_array_add(indexfn, sequence_filename); gt_str_delete(sequence_filename); had_err = gt_encseq_encoder_encode(ee, indexfn, gt_str_get(bioseq_indexname), err); /* unregister the signal handler */ if (!bs->use_stdin) gt_sig_unregister_all(); gt_str_array_delete(indexfn); gt_encseq_encoder_delete(ee); return had_err; }
static int gt_genomediff_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { bool mirrored = false; int had_err = 0, i; GtEncseq *encseq = NULL; GtGenomediffArguments *arguments = tool_arguments; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (timer != NULL) gt_timer_show_progress(timer, "start shu search", stdout); if (gt_str_array_size(arguments->filenames) > 1UL) { GtEncseqEncoder *ee = gt_encseq_encoder_new(); gt_encseq_encoder_set_timer(ee, timer); gt_encseq_encoder_set_logger(ee, logger); /* kr only makes sense for dna, so we can check this already with ee */ gt_encseq_encoder_set_input_dna(ee); had_err = gt_encseq_encoder_encode(ee, arguments->filenames, gt_str_get(arguments->indexname), err); gt_encseq_encoder_delete(ee); } else { gt_str_append_str(arguments->indexname, gt_str_array_get_str(arguments->filenames, 0)); if (arguments->with_esa || arguments->with_pck) { GtStr *current_line = gt_str_new(); FILE *prj_fp; const char *buffer; char **elements = NULL; prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname), GT_PROJECTFILESUFFIX,"rb",err); if (prj_fp == NULL) had_err = -1; while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) { buffer = gt_str_get(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); elements = gt_cstr_split(buffer, '='); gt_log_log("%s", elements[0]); if (strcmp("mirrored", elements[0]) == 0) { gt_log_log("%s", elements[1]); if (strcmp("1", elements[1]) == 0) { mirrored = true; gt_log_log("sequences are treated as mirrored"); } } gt_str_reset(current_line); } gt_str_delete(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); gt_fa_xfclose(prj_fp); } } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); if (mirrored) gt_encseq_loader_mirror(el); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (!had_err) { uint64_t **shusums = NULL; if (arguments->with_esa || arguments->with_pck) { shusums = gt_genomediff_shulen_sum(arguments, unit_info, logger, timer, err); if (shusums == NULL) had_err = -1; } else { const bool doesa = true; GenomediffInfo gd_info; Suffixeratoroptions sopts; sopts.beverbose = arguments->verbose; sopts.indexname = arguments->indexname; sopts.db = NULL; sopts.encopts = NULL; sopts.genomediff = true; sopts.inputindex = arguments->indexname; sopts.loadopts = arguments->loadopts; sopts.showprogress = false; sopts.idxopts = arguments->idxopts; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); gd_info.shulensums = shusums; gd_info.unit_info = unit_info; had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err); } if (!had_err && shusums != NULL) { had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info, arguments->with_pck, logger, timer, err); gt_array2dim_delete(shusums); } } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }