GtHcrDecoder *gt_hcr_decoder_new(const char *name, GtAlphabet *alpha, bool descs, GtTimer *timer, GtError *err) { GtHcrDecoder *hcr_dec; int had_err = 0; gt_error_check(err); if (timer != NULL) gt_timer_show_progress(timer, "initialize hcr decoder", stdout); hcr_dec = gt_malloc(sizeof (GtHcrDecoder)); if (descs) { hcr_dec->encdesc = gt_encdesc_load(name, err); if (gt_error_is_set(err)) { had_err = -1; } } else hcr_dec->encdesc = NULL; if (!had_err) { hcr_dec->seq_dec = hcr_seq_decoder_new(alpha, name, err); if (!gt_error_is_set(err)) return hcr_dec; } gt_hcr_decoder_delete(hcr_dec); return NULL; }
static int gt_readjoiner_assembly_build_graph( GtReadjoinerAssemblyArguments *arguments, GtStrgraph **strgraph, GtEncseq *reads, const char *readset, bool eqlen, GtUword rlen, GtUword nreads, GtBitsequence *contained, GtLogger *default_logger, GtLogger *verbose_logger, GtTimer *timer, GtError *err) { int had_err = 0; *strgraph = gt_strgraph_new(nreads); if (arguments->minmatchlength > 0) gt_logger_log(verbose_logger, "SPM length cutoff = %u", arguments->minmatchlength); had_err = gt_readjoiner_assembly_count_spm(readset, eqlen, arguments->minmatchlength, arguments->nspmfiles, *strgraph, contained, default_logger, err); gt_readjoiner_assembly_show_current_space("(edges counted)"); if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_BUILDSG, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_BUILDSG); if (had_err == 0) { gt_assert((eqlen && rlen > 0 && reads == NULL) || (!eqlen && rlen == 0 && reads != NULL)); gt_strgraph_allocate_graph(*strgraph, rlen, reads); gt_readjoiner_assembly_show_current_space("(graph allocated)"); had_err = gt_strgraph_load_spm_from_file(*strgraph, (GtUword)arguments->minmatchlength, arguments->redtrans, contained, readset, arguments->nspmfiles, GT_READJOINER_SUFFIX_SPMLIST, err); } return had_err; }
int gt_hcr_decoder_decode_range(GtHcrDecoder *hcr_dec, const char *name, GtUword start, GtUword end, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; GtStr *desc = gt_str_new(); int had_err = 0; GtUword cur_width, cur_read; size_t i; FILE *output; GT_UNUSED GtHcrSeqDecoder *seq_dec; gt_error_check(err); gt_assert(hcr_dec && name); seq_dec = hcr_dec->seq_dec; gt_assert(start <= end); gt_assert(start < seq_dec->num_of_reads && end < seq_dec->num_of_reads); if (timer != NULL) gt_timer_show_progress(timer, "decode hcr", stdout); output = gt_fa_fopen_with_suffix(name, HCRFILEDECODEDSUFFIX, "w", err); if (output == NULL) had_err = -1; for (cur_read = start; had_err == 0 && cur_read <= end; cur_read++) { if (gt_hcr_decoder_decode(hcr_dec, cur_read, seq, qual, desc, err) != 0) had_err = -1; else { gt_xfputc(HCR_DESCSEPSEQ, output); if (hcr_dec->encdesc != NULL) gt_xfputs(gt_str_get(desc), output); else fprintf(output, ""GT_WU"", cur_read); gt_xfputc('\n', output); for (i = 0, cur_width = 0; i < strlen(seq); i++, cur_width++) { if (cur_width == HCR_LINEWIDTH) { cur_width = 0; gt_xfputc('\n', output); } gt_xfputc(seq[i], output); } gt_xfputc('\n', output); gt_xfputc(HCR_DESCSEPQUAL, output); gt_xfputc('\n', output); for (i = 0, cur_width = 0; i < strlen(qual); i++, cur_width++) { if (cur_width == HCR_LINEWIDTH) { cur_width = 0; gt_xfputc('\n', output); } gt_xfputc(qual[i], output); } gt_xfputc('\n', output); } } gt_fa_xfclose(output); gt_str_delete(desc); return had_err; }
static int gt_compreads_compress_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCsrHcrEncodeArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha = NULL; GtHcrEncoder *hcre = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (gt_str_length(arguments->smap) > 0) { alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap), err); if (!alpha) had_err = 1; } else { alpha = gt_alphabet_new_dna(); if (!alpha) had_err = 1; } if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "encoding", stdout); hcre = gt_hcr_encoder_new(arguments->files, alpha, arguments->descs, arguments->qrng, timer, err); if (!hcre) had_err = 1; else { if (arguments->pagewise) gt_hcr_encoder_set_sampling_page(hcre); else if (arguments->regular) gt_hcr_encoder_set_sampling_regular(hcre); gt_hcr_encoder_set_sampling_rate(hcre, arguments->srate); if (gt_hcr_encoder_encode(hcre, gt_str_get(arguments->name), timer, err) != 0) had_err = 1; } gt_hcr_encoder_delete(hcre); } gt_alphabet_delete(alpha); if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } return had_err; }
unsigned long gt_uint64hashtable_partialsums(GtUint64hashtable *table, GtTimer *timer) { size_t idx, next = 0; unsigned long psum, maxsize = 0; table->sortedhspace = gt_malloc((size_t) table->allentries * sizeof (*table->sortedhspace)); if (timer != NULL) { gt_timer_show_progress(timer, "sorting the hashkeys",stdout); } for (idx = 0; idx < table->alloc; idx++) { if (table->hspace[idx].count > 0) { gt_assert(next < (size_t) table->allentries); table->sortedhspace[next++] = idx; if (maxsize < table->hspace[idx].count) { maxsize = table->hspace[idx].count; } } } gt_qsort_r(table->sortedhspace,next,sizeof(*table->sortedhspace), table->hspace,compareGtUint64hashstoredvalue); gt_assert(next > 0); if (table->zero_occurs) { table->hspace[table->sortedhspace[0]].count += table->zero_count; } if (timer != NULL) { gt_timer_show_progress(timer, "computing partial sums",stdout); } for (idx = (size_t) 1; idx < next; idx++) { table->hspace[table->sortedhspace[idx]].count += table->hspace[table->sortedhspace[idx-1]].count; } psum = table->hspace[table->sortedhspace[next-1]].count; gt_free(table->sortedhspace); return psum; }
static int hcr_write_seq_qual_data(const char *name, GtHcrEncoder *hcr_enc, GtTimer *timer, GtError *err) { int had_err = 0; FILE *fp; GtUword dummy = 0; GtWord pos; gt_error_check(err); fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "wb", err); if (fp == NULL) had_err = -1; if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "write sequences and qualities encoding", stdout); hcr_write_file_info(fp, hcr_enc); had_err = hcr_write_seqdistrtab(fp, hcr_enc); if (!had_err) { bool is_not_at_pageborder; pos = ftell(fp); gt_xfwrite_one(&dummy, fp); is_not_at_pageborder = (ftell(fp) % hcr_enc->pagesize) != 0; if (is_not_at_pageborder) hcr_enc->seq_encoder->start_of_encoding = (ftell(fp) / hcr_enc->pagesize + 1) * hcr_enc->pagesize; else hcr_enc->seq_encoder->start_of_encoding = ftell(fp); if (hcr_enc->page_sampling) hcr_enc->seq_encoder->sampling = gt_sampling_new_page(hcr_enc->sampling_rate, (off_t) hcr_enc->seq_encoder->start_of_encoding); else if (hcr_enc->regular_sampling) hcr_enc->seq_encoder->sampling = gt_sampling_new_regular(hcr_enc->sampling_rate, (off_t) hcr_enc->seq_encoder->start_of_encoding); had_err = hcr_write_seqs(fp, hcr_enc, err); } if (!had_err) { gt_assert(fp); gt_xfseek(fp, pos, SEEK_SET); gt_xfwrite_one(&hcr_enc->seq_encoder->startofsamplingtab, fp); } gt_fa_xfclose(fp); } return 0; }
static int gt_readjoiner_assembly_paths2seq(const char *readset, GtUword lengthcutoff, bool showpaths, bool astat, double coverage, bool load_copynum, GtUword buffersize, GtLogger *default_logger, GtTimer **timer, GtError *err) { int had_err; GtEncseqLoader *el = gt_encseq_loader_new(); GtEncseq *reads; if (gt_showtime_enabled()) { gt_assert(timer != NULL); if (*timer == NULL) /* paths2seq */ { *timer = gt_timer_new_with_progress_description( GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ); gt_timer_show_cpu_time_by_progress(*timer); gt_timer_start(*timer); } else gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ, stdout); } gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_mirror(el); reads = gt_encseq_loader_load(el, readset, err); gt_assert(reads != NULL); gt_readjoiner_assembly_pump_encseq_through_cache(reads); if (gt_showtime_enabled()) gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS); had_err = gt_contigpaths_to_fasta(readset, GT_READJOINER_SUFFIX_CONTIG_PATHS, GT_READJOINER_SUFFIX_CONTIGS, reads, lengthcutoff, showpaths, astat, coverage, load_copynum, (size_t)buffersize, default_logger, err); gt_encseq_delete(reads); gt_encseq_loader_delete(el); return had_err; }
static void gt_readjoiner_assembly_load_graph(GtStrgraph **strgraph, GtEncseq *reads, const char *readset, GtUword rlen, GtLogger *default_logger, GtTimer *timer) { *strgraph = gt_strgraph_new_from_file(reads, rlen, readset, GT_READJOINER_SUFFIX_SG); if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_LOADSG, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_LOADSG); gt_readjoiner_assembly_show_current_space("(graph loaded)"); }
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name, GtTimer *timer, GtError *err) { int had_err = 0; GtStr *name1; gt_error_check(err); if (timer != NULL) gt_timer_show_progress(timer, "write encoding", stdout); if (hcr_enc->encdesc_encoder != NULL) { GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files, err); had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder, cstr_iterator, name, err); gt_cstr_iterator_delete(cstr_iterator); } if (!had_err) had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err); if (!had_err && gt_log_enabled()) { name1 = gt_str_new_cstr(name); gt_str_append_cstr(name1, HCRFILESUFFIX); gt_log_log("sequences with qualities encoding overview:"); gt_log_log("**>"); if (hcr_enc->page_sampling) gt_log_log("applied sampling technique: sampling every " GT_WU "th page", hcr_enc->sampling_rate); else if (hcr_enc->regular_sampling) gt_log_log("applied sampling technique: sampling every " GT_WU "th read", hcr_enc->sampling_rate); else gt_log_log("applied sampling technique: none"); gt_log_log("total number of encoded nucleotide sequences with qualities: " GT_WU, hcr_enc->num_of_reads); gt_log_log("total number of encoded nucleotides: " GT_LLU, hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("bits per nucleotide encoding: %f", (gt_file_estimate_size(gt_str_get(name1)) * 8.0) / hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("<**"); gt_str_delete(name1); } return had_err; }
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd, unsigned long amount, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; int had_err = 0; unsigned long rand, max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1, count; GtStr *timer_comment = gt_str_new_cstr("extracting "); GtStr *desc = gt_str_new(); gt_str_append_ulong(timer_comment, amount); gt_str_append_cstr(timer_comment, " reads of "); gt_str_append_ulong(timer_comment, max_rand + 1); gt_str_append_cstr(timer_comment, "!"); if (timer == NULL) { timer = gt_timer_new_with_progress_description("extract random reads"); gt_timer_start(timer); } else { gt_timer_show_progress(timer, "extract random reads", stdout); } gt_log_log("%s",gt_str_get(timer_comment)); for (count = 0; count < amount; count++) { if (!had_err) { rand = gt_rand_max(max_rand); gt_log_log("get read: %lu", rand); had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err); gt_log_log("%s",gt_str_get(desc)); gt_log_log("%s",seq); gt_log_log("%s",qual); } } gt_str_delete(timer_comment); gt_str_delete(desc); if (!gt_showtime_enabled()) gt_timer_delete(timer); return had_err; }
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha, bool descs, GtQualRange qrange, GtTimer *timer, GtError *err) { GtBaseQualDistr *bqd; GtHcrEncoder *hcr_enc; GtSeqIterator *seqit; GtStrArray *file; int had_err = 0, status; GtUword len1, len2, i, num_of_reads = 0; const GtUchar *seq, *qual; char *desc; gt_error_check(err); gt_assert(alpha && files); if (timer != NULL) gt_timer_show_progress(timer, "get <base,qual> distr", stdout); if (qrange.start != GT_UNDEF_UINT) if (qrange.start == qrange.end) { gt_error_set(err, "qrange.start must unequal qrange.end"); return NULL; } hcr_enc = gt_malloc(sizeof (GtHcrEncoder)); hcr_enc->files = files; hcr_enc->num_of_files = gt_str_array_size(files); hcr_enc->num_of_reads = 0; hcr_enc->page_sampling = false; hcr_enc->regular_sampling = false; hcr_enc->sampling_rate = 0; hcr_enc->pagesize = gt_pagesize(); if (descs) { hcr_enc->encdesc_encoder = gt_encdesc_encoder_new(); if (timer != NULL) gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer); } else hcr_enc->encdesc_encoder = NULL; hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder)); hcr_enc->seq_encoder->alpha = alpha; hcr_enc->seq_encoder->sampling = NULL; hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files, sizeof (*(hcr_enc->seq_encoder->fileinfos))); hcr_enc->seq_encoder->qrange = qrange; bqd = hcr_base_qual_distr_new(alpha, qrange); /* check if reads in the same file are of same length and get <base, quality> pair distribution */ for (i = 0; i < hcr_enc->num_of_files; i++) { file = gt_str_array_new(); gt_str_array_add(file, gt_str_array_get_str(files, i)); seqit = gt_seq_iterator_fastq_new(file, err); if (!seqit) { gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object"); had_err = -1; } if (!had_err) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha)); gt_seq_iterator_set_quality_buffer(seqit, &qual); status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err); if (status == 1) { num_of_reads = 1UL; while (!had_err) { status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err); if (status == -1) had_err = -1; if (status != 1) break; if (len2 != len1) { gt_error_set(err, "reads have to be of equal length"); had_err = -1; break; } if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0) had_err = -1; len1 = len2; num_of_reads++; } } else if (status == -1) had_err = -1; if (!had_err) { if (i == 0) hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads; else hcr_enc->seq_encoder->fileinfos[i].readnum = hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads; hcr_enc->seq_encoder->fileinfos[i].readlength = len1; } } hcr_enc->num_of_reads += num_of_reads; gt_str_array_delete(file); gt_seq_iterator_delete(seqit); } if (!had_err) hcr_base_qual_distr_trim(bqd); if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "build huffman tree for sequences and" " qualities", stdout); hcr_enc->seq_encoder->huffman = gt_huffman_new(bqd, hcr_base_qual_distr_func, (GtUword) bqd->ncols * bqd->nrows); } if (!had_err) { hcr_enc->seq_encoder->qual_offset = bqd->qual_offset; hcr_base_qual_distr_delete(bqd); return hcr_enc; } return NULL; }
static int gt_genomediff_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { bool mirrored = false; int had_err = 0, i; GtEncseq *encseq = NULL; GtGenomediffArguments *arguments = tool_arguments; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (timer != NULL) gt_timer_show_progress(timer, "start shu search", stdout); if (gt_str_array_size(arguments->filenames) > 1UL) { GtEncseqEncoder *ee = gt_encseq_encoder_new(); gt_encseq_encoder_set_timer(ee, timer); gt_encseq_encoder_set_logger(ee, logger); /* kr only makes sense for dna, so we can check this already with ee */ gt_encseq_encoder_set_input_dna(ee); had_err = gt_encseq_encoder_encode(ee, arguments->filenames, gt_str_get(arguments->indexname), err); gt_encseq_encoder_delete(ee); } else { gt_str_append_str(arguments->indexname, gt_str_array_get_str(arguments->filenames, 0)); if (arguments->with_esa || arguments->with_pck) { GtStr *current_line = gt_str_new(); FILE *prj_fp; const char *buffer; char **elements = NULL; prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname), GT_PROJECTFILESUFFIX,"rb",err); if (prj_fp == NULL) had_err = -1; while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) { buffer = gt_str_get(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); elements = gt_cstr_split(buffer, '='); gt_log_log("%s", elements[0]); if (strcmp("mirrored", elements[0]) == 0) { gt_log_log("%s", elements[1]); if (strcmp("1", elements[1]) == 0) { mirrored = true; gt_log_log("sequences are treated as mirrored"); } } gt_str_reset(current_line); } gt_str_delete(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); gt_fa_xfclose(prj_fp); } } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); if (mirrored) gt_encseq_loader_mirror(el); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (!had_err) { uint64_t **shusums = NULL; if (arguments->with_esa || arguments->with_pck) { shusums = gt_genomediff_shulen_sum(arguments, unit_info, logger, timer, err); if (shusums == NULL) had_err = -1; } else { const bool doesa = true; GenomediffInfo gd_info; Suffixeratoroptions sopts; sopts.beverbose = arguments->verbose; sopts.indexname = arguments->indexname; sopts.db = NULL; sopts.encopts = NULL; sopts.genomediff = true; sopts.inputindex = arguments->indexname; sopts.loadopts = arguments->loadopts; sopts.showprogress = false; sopts.idxopts = arguments->idxopts; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); gd_info.shulensums = shusums; gd_info.unit_info = unit_info; had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err); } if (!had_err && shusums != NULL) { had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info, arguments->with_pck, logger, timer, err); gt_array2dim_delete(shusums); } } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }
static int gt_compreads_decompress_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCsrHcrDecodeArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha = NULL; GtHcrDecoder *hcrd = NULL; GtTimer *timer = NULL; unsigned long start, end; gt_error_check(err); gt_assert(arguments); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (gt_str_length(arguments->smap) > 0) { alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap), err); if (!alpha) had_err = -1; } else { alpha = gt_alphabet_new_dna(); if (!alpha) had_err = -1; } if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "decoding", stdout); if (gt_str_length(arguments->name) == 0) { char *basenameptr; basenameptr = gt_basename(gt_str_get(arguments->file)); gt_str_set(arguments->name, basenameptr); gt_free(basenameptr); } hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha, arguments->descs, timer, err); if (hcrd == NULL) had_err = -1; else { if (arguments->bench != 0) { had_err = gt_compreads_decompress_benchmark(hcrd, arguments->bench, timer, err); } else { if (arguments->rng.start != GT_UNDEF_ULONG && arguments->rng.end != GT_UNDEF_ULONG) { if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd) || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) { gt_error_set(err, "range %lu-%lu includes a read number exceeding " "the total number of reads (%lu)", arguments->rng.start, arguments->rng.end, gt_hcr_decoder_num_of_reads(hcrd)); had_err = -1; } start = arguments->rng.start; end = arguments->rng.end; } else { start = 0; end = gt_hcr_decoder_num_of_reads(hcrd) - 1; } if (!had_err) { gt_log_log("filebasename: %s", gt_str_get(arguments->name)); if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name), start, end, timer, err) != 0) had_err = -1; } } } gt_hcr_decoder_delete(hcrd); } gt_alphabet_delete(alpha); if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } if (had_err) gt_assert(gt_error_is_set(err)); return had_err; }
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtKmerDatabaseArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *es; GtUword es_length, nu_kmer_codes = 0; GtKmerDatabase *compare_db = NULL, *db = NULL; GtLogger *logger; FILE *fp = NULL; GtHashmap *kmer_hash = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); if (arguments->use_hash) kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_kmer_database_delete_hash_value); if (arguments->bench) timer = gt_timer_new_with_progress_description("loading encoded sequence"); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) { fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err); gt_logger_set_target(logger, fp); } if (!had_err) { GtEncseqLoader *es_l; if (arguments->bench) gt_timer_start(timer); es_l = gt_encseq_loader_new(); es = gt_encseq_loader_load(es_l, argv[parsed_args], err); if (arguments->bench) gt_timer_show_progress(timer, "saving kmers (+iterating over file)", stdout); if (es == NULL) { had_err = -1; } gt_encseq_loader_delete(es_l); } if (!had_err) { es_length = gt_encseq_total_length(es); if (es_length < (GtUword) arguments->kmersize) { gt_error_set(err, "Input is too short for used kmersize. File length: " GT_WU " kmersize: %u", es_length, arguments->kmersize); had_err = -1; } } if (!had_err) { GtAlphabet *alphabet; alphabet = gt_encseq_alphabet(es); if (arguments->bench) nu_kmer_codes = gt_power_for_small_exponents( gt_alphabet_num_of_chars(alphabet), arguments->kmersize); if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) { compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); } if (!arguments->use_hash) { db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); if (arguments->cutoff) { if (arguments->mean_cutoff) gt_kmer_database_use_mean_cutoff(db, (GtUword) 2, arguments->cutoff_value); else gt_kmer_database_set_cutoff(db, arguments->cutoff_value); if (!arguments->prune) gt_kmer_database_set_prune(db); } } } if (!had_err) { GtUword startpos = 0, endpos; GtKmercodeiterator *iter; const GtKmercode *kmercode = NULL; iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD, arguments->kmersize, 0); while (!had_err && startpos < es_length - (arguments->kmersize - 1)) { GtUword startpos_add_kmer = startpos; if (arguments->merge_only) { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max((arguments->sb_size - 1) * 2)); if (endpos > es_length) endpos = es_length; } else { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max(arguments->sb_size - 1)); } gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos); while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL && startpos_add_kmer <= endpos - (arguments->kmersize - 1)) { if (!arguments->merge_only && !arguments->use_hash && !kmercode->definedspecialposition && !arguments->bench) { gt_kmer_database_add_kmer(compare_db, kmercode->code, startpos_add_kmer); } if (arguments->use_hash && !kmercode->definedspecialposition) { gt_kmer_database_add_to_hash(kmer_hash, kmercode->code, startpos_add_kmer); } startpos_add_kmer++; } if (!arguments->use_hash) { gt_kmer_database_add_interval(db, startpos, endpos); gt_kmer_database_print_buffer(db, logger); if (!arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); } startpos = endpos + 1; } if (!arguments->use_hash) { gt_kmer_database_flush(db); gt_kmer_database_print_buffer(db, logger); if (!had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(compare_db, err); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_print(compare_db, logger, true); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_compare(compare_db, db, err); gt_kmer_database_print(db, logger, true); } gt_kmercodeiterator_delete(iter); } if (arguments->bench) { GtKmerStartpos pos; GtArrayGtUword *pos_hash; GtUword rand_access = (GtUword) 50000000, rand_code, i, sum = 0; gt_timer_show_progress(timer, "random access", stdout); for (i = 0; i < rand_access; i++) { rand_code = gt_rand_max(nu_kmer_codes - 1); if (arguments->use_hash) { pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code); if (pos_hash != NULL) sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1]; } else { pos = gt_kmer_database_get_startpos(db, rand_code); if (pos.no_positions > 0) sum += pos.startpos[pos.no_positions - 1]; } } printf("sum: " GT_WU "\n", sum); gt_timer_show_progress(timer, "", stdout); gt_timer_stop(timer); gt_timer_delete(timer); } if (arguments->use_hash) gt_hashmap_delete(kmer_hash); gt_encseq_delete(es); if (!arguments->use_hash) gt_kmer_database_delete(db); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_delete(compare_db); gt_logger_delete(logger); gt_fa_fclose(fp); return had_err; }
static int gt_gdiffcalc_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtGenomediffArguments *arguments = tool_arguments; int had_err = 0, i; GtUword lcounter = 0, zcounter = 0; double **shusums = NULL; GtEncseq *encseq = NULL; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("load encseq"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (timer != NULL) gt_timer_show_progress(timer, "load units", stdout); if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (timer != NULL) gt_timer_show_progress(timer, "read table", stdout); if (!had_err) { GtIO *table_file = NULL; GtTokenizer *tokenizer = NULL; GtStr *line = NULL; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); table_file = gt_io_new(gt_str_array_get(arguments->filenames, 0), "r"); tokenizer = gt_tokenizer_new(table_file); line = gt_tokenizer_get_token(tokenizer); while (line != NULL && !had_err) { char *cline = gt_str_get(line); char *elem = strtok(cline, ";"); zcounter = 0; while (elem != NULL && !had_err) { if (*elem != '#') { if (1 != sscanf(elem, "%lf", &shusums[lcounter][zcounter])) { had_err = 1; gt_error_set(err, "couldn't scan"); break; } gt_logger_log(logger,"wert: %lf", shusums[lcounter][zcounter]); zcounter++; } else { gt_logger_log(logger, "name: %s", elem++); } elem = strtok(NULL, ";"); } gt_tokenizer_next_token(tokenizer); gt_str_delete(line); line = gt_tokenizer_get_token(tokenizer); lcounter++; gt_logger_log(logger, "line "GT_WD"", lcounter); } } if (!had_err) { GtUword num_of_seq, file_idx, seq_idx, startpos; GT_UNUSED GtUword oldpos = 0; gt_assert(unit_info != NULL); gt_assert(lcounter == zcounter); gt_assert(lcounter == unit_info->num_of_genomes); num_of_seq = gt_encseq_num_of_sequences(unit_info->encseq); for (seq_idx = 0; seq_idx < num_of_seq; seq_idx++) { startpos = gt_encseq_seqstartpos(unit_info->encseq, seq_idx); file_idx = gt_encseq_filenum(unit_info->encseq, startpos); gt_log_log("seq: "GT_WU" starts at: "GT_WU"\n" "belonges to file: "GT_WU" which is part of genome: %s", seq_idx, startpos, file_idx, gt_str_array_get(unit_info->genome_names, unit_info->map_files[file_idx])); gt_assert(oldpos <= startpos); oldpos = startpos; } } if (!had_err && shusums != NULL) { had_err = gt_genomediff_calculate_div_from_avg(shusums, arguments, unit_info, logger, timer, err); gt_array2dim_delete(shusums); } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }
static int gt_readjoiner_assembly_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtReadjoinerAssemblyArguments *arguments = tool_arguments; GtLogger *verbose_logger, *default_logger; GtEncseqLoader *el; GtEncseq *reads; GtTimer *timer = NULL; GtStrgraph *strgraph = NULL; GtBitsequence *contained = NULL; const char *readset = gt_str_get(arguments->readset); bool eqlen = true; GtUword nreads, tlen, rlen; int had_err = 0; gt_assert(arguments); gt_error_check(err); default_logger = gt_logger_new(!arguments->quiet, GT_LOGGER_DEFLT_PREFIX, stdout); gt_logger_log(default_logger, "gt readjoiner assembly (version "GT_READJOINER_VERSION")"); verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_logger_log(verbose_logger, "verbose output activated"); gt_logger_log(verbose_logger, "readset name = %s", readset); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description( GT_READJOINER_ASSEMBLY_MSG_COUNTSPM); gt_timer_start(timer); gt_timer_show_cpu_time_by_progress(timer); } if (!arguments->paths2seq) { el = gt_encseq_loader_new(); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); reads = gt_encseq_loader_load(el, readset, err); if (reads == NULL) { had_err = -1; } if (had_err == 0) { eqlen = gt_encseq_accesstype_get(reads) == GT_ACCESS_TYPE_EQUALLENGTH; nreads = gt_encseq_num_of_sequences(reads); gt_logger_log(default_logger, "number of reads in filtered readset = " GT_WU, nreads); tlen = gt_encseq_total_length(reads) - nreads + 1; gt_logger_log(verbose_logger, "total length of filtered readset = " GT_WU, tlen); if (eqlen) { rlen = gt_encseq_seqlength(reads, 0); gt_logger_log(verbose_logger, "read length = " GT_WU, rlen); gt_encseq_delete(reads); reads = NULL; } else { had_err = gt_readjoiner_assembly_build_contained_reads_list( arguments, &contained, err); rlen = 0; gt_logger_log(verbose_logger, "read length = variable"); gt_assert(reads != NULL); } } if (had_err == 0) { if (!arguments->load) { had_err = gt_readjoiner_assembly_build_graph(arguments, &strgraph, reads, readset, eqlen, rlen, nreads, contained, default_logger, verbose_logger, timer, err); } else { gt_readjoiner_assembly_load_graph(&strgraph, reads, readset, rlen, default_logger, timer); } } if (!eqlen && reads != NULL && !arguments->errors) { gt_encseq_delete(reads); reads = NULL; if (had_err == 0) gt_strgraph_set_encseq(strgraph, NULL); } if (had_err == 0 && arguments->redtrans) { if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_REDTRANS, stdout); gt_strgraph_sort_edges_by_len(strgraph, false); (void)gt_strgraph_redtrans(strgraph, false); (void)gt_strgraph_redself(strgraph, false); (void)gt_strgraph_redwithrc(strgraph, false); gt_strgraph_log_stats(strgraph, verbose_logger); } if (had_err == 0 && arguments->errors) { if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_CLEANSG, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_CLEANSG); had_err = gt_readjoiner_assembly_error_correction(strgraph, arguments->bubble, arguments->deadend, arguments->deadend_depth, verbose_logger); } if (had_err == 0 && arguments->save) { if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_SAVESG, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_SAVESG); gt_strgraph_show(strgraph, GT_STRGRAPH_BIN, gt_str_get(arguments->readset), GT_READJOINER_SUFFIX_SG, false); } if (!eqlen && reads != NULL) { gt_encseq_delete(reads); reads = NULL; if (had_err == 0) gt_strgraph_set_encseq(strgraph, NULL); } if (had_err == 0) { if (gt_showtime_enabled()) gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG, stdout); gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG); gt_readjoiner_assembly_show_current_space("(before traversal)"); gt_strgraph_spell(strgraph, (GtUword)arguments->depthcutoff, (GtUword)arguments->lengthcutoff, arguments->vd, readset, GT_READJOINER_SUFFIX_CONTIG_PATHS, NULL, true, arguments->show_contigs_info, false, verbose_logger); } if (contained != NULL) gt_free(contained); gt_strgraph_delete(strgraph); strgraph = NULL; gt_assert(reads == NULL); gt_encseq_loader_delete(el); } if (had_err == 0) { gt_readjoiner_assembly_show_current_space("(before paths2seq)"); had_err = gt_readjoiner_assembly_paths2seq(readset, (GtUword)arguments->lengthcutoff, arguments->vd, arguments->astat, arguments->coverage, arguments->copynum, arguments->buffersize, default_logger, &timer, err); } if (gt_showtime_enabled()) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(default_logger); gt_logger_delete(verbose_logger); return had_err; }
static int gt_condenser_search_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCondenserSearchArguments *arguments = tool_arguments; int i, had_err = 0; char *querypath = gt_str_get(arguments->querypath); GtStr* coarse_fname = gt_str_new_cstr("coarse_"); char *db_basename = NULL; char *suffix_ptr = NULL; GtTimer *timer = NULL; GtLogger *logger = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); db_basename = gt_basename(gt_str_get(arguments->dbpath)); /* if first char is '.' this might be a hidden file */ if (strlen(db_basename) > (size_t) 1 && (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) { /* remove suffix */ *suffix_ptr = '\0'; } gt_str_append_cstr(coarse_fname, db_basename); gt_str_append_cstr(coarse_fname, ".fas"); gt_free(db_basename); db_basename = NULL; suffix_ptr = NULL; if (arguments->blastn || arguments->blastp) { GtMatch *match; GtMatchIterator *mp = NULL; GtNREncseq *nrencseq = NULL; GtStr *fastaname = gt_str_clone(arguments->dbpath); HitPosition *hits; double eval, raw_eval = 0.0; GtUword coarse_db_len = 0; GtMatchIteratorStatus status; int curr_hits = 0, max_hits = 100; hits = gt_malloc(sizeof (*hits) * (size_t) max_hits); gt_str_append_cstr(fastaname, ".fas"); for (i=0; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("initialization"); gt_timer_start(timer); } /*extract sequences from compressed database*/ if (!had_err) { nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath), logger, err); if (nrencseq == NULL) had_err = -1; } if (!had_err) { if (arguments->ceval == GT_UNDEF_DOUBLE || arguments->feval == GT_UNDEF_DOUBLE) { /* from NCBI BLAST tutorial: E = Kmne^{-lambdaS} calculates E-value for score S with natural scale parameters K for search space size and lambda for the scoring system E = mn2^-S' m being the subject (total) length, n the length of ONE query calculates E-value for bit-score S' */ GtFastaReader *reader; GtCondenserSearchAvg avg = {0,0}; reader = gt_fasta_reader_rec_new(arguments->querypath); had_err = gt_fasta_reader_run(reader, NULL, NULL, gt_condenser_search_cum_moving_avg, &avg, err); if (!had_err) { GtUword S = arguments->bitscore; gt_log_log(GT_WU " queries, avg query size: " GT_WU, avg.count, avg.avg); raw_eval = 1/pow(2.0, (double) S) * avg.avg; gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval); gt_assert(avg.avg != 0); } gt_fasta_reader_delete(reader); } } /*create BLAST database from compressed database fasta file*/ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create coarse BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname), err); } if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "coarse BLAST run", stderr); if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(fastaname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, arguments->ceval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtUword hit_seq_id; char string[7]; const char *dbseqid = gt_match_get_seqid2(match); if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) { gt_match_get_range_seq2(match, hits[curr_hits].range); hits[curr_hits].idx = hit_seq_id; gt_match_delete(match); curr_hits++; if (curr_hits == max_hits) { HitPosition *hit_extention; max_hits += 100; hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits); for (i=max_hits - 100; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range)); } } } else { gt_error_set(err, "could not parse unique db header %s", dbseqid); had_err = -1; } } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_match_iterator_delete(mp); } /*extract sequences*/ if (!had_err) { GtNREncseqDecompressor *decomp; GtFile *coarse_hits; if (timer != NULL) gt_timer_show_progress(timer, "extract coarse search hits", stderr); decomp = gt_n_r_encseq_decompressor_new(nrencseq); coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err); /* TODO DW do NOT extract complete uniques! these could be complete chromosomes!! just extract something around it? maybe +- max query length*/ for (i = 0; i < curr_hits; i++) { gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp, hits[i].idx); } had_err = gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits, decomp, &coarse_db_len, err); gt_assert(coarse_db_len != 0); gt_file_delete(coarse_hits); gt_n_r_encseq_decompressor_delete(decomp); } gt_n_r_encseq_delete(nrencseq); /* create BLAST database from decompressed database file */ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create fine BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname), err); } /* perform fine BLAST search */ if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "fine BLAST run", stderr); if (arguments->feval == GT_UNDEF_DOUBLE) { eval = raw_eval * coarse_db_len; } else { eval = arguments->feval; } if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(coarse_fname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, eval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval, coarse_db_len); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); if (!had_err) { GtUword numofhits = 0; while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtMatchBlast *matchb = (GtMatchBlast*) match; char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50); GtRange range_seq1; GtRange range_seq2; numofhits++; gt_match_get_range_seq1(match, &range_seq1); gt_match_get_range_seq2(match, &range_seq2); gt_file_xprintf( arguments->outfp, "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t%g\t%.3f\n", gt_match_get_seqid1(match), gt_match_get_seqid2(match), gt_match_blast_get_similarity(matchb), gt_match_blast_get_align_length(matchb), range_seq1.start, range_seq1.end, range_seq2.start, range_seq2.end, gt_match_blast_get_evalue(matchb), (double) gt_match_blast_get_bitscore(matchb)); gt_match_delete(match); gt_free(dbseqid); } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_log_log(GT_WU " hits found\n", numofhits); } gt_match_iterator_delete(mp); } if (!had_err) if (timer != NULL) gt_timer_show_progress_final(timer, stderr); gt_timer_delete(timer); /*cleanup*/ for (i=0; i < max_hits; i++) { gt_free(hits[i].range); } gt_free(hits); gt_str_delete(fastaname); } gt_str_delete(coarse_fname); gt_logger_delete(logger); return had_err; }