static int hcr_write_seq_qual_data(const char *name, GtHcrEncoder *hcr_enc, GtTimer *timer, GtError *err) { int had_err = 0; FILE *fp; GtUword dummy = 0; GtWord pos; gt_error_check(err); fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "wb", err); if (fp == NULL) had_err = -1; if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "write sequences and qualities encoding", stdout); hcr_write_file_info(fp, hcr_enc); had_err = hcr_write_seqdistrtab(fp, hcr_enc); if (!had_err) { bool is_not_at_pageborder; pos = ftell(fp); gt_xfwrite_one(&dummy, fp); is_not_at_pageborder = (ftell(fp) % hcr_enc->pagesize) != 0; if (is_not_at_pageborder) hcr_enc->seq_encoder->start_of_encoding = (ftell(fp) / hcr_enc->pagesize + 1) * hcr_enc->pagesize; else hcr_enc->seq_encoder->start_of_encoding = ftell(fp); if (hcr_enc->page_sampling) hcr_enc->seq_encoder->sampling = gt_sampling_new_page(hcr_enc->sampling_rate, (off_t) hcr_enc->seq_encoder->start_of_encoding); else if (hcr_enc->regular_sampling) hcr_enc->seq_encoder->sampling = gt_sampling_new_regular(hcr_enc->sampling_rate, (off_t) hcr_enc->seq_encoder->start_of_encoding); had_err = hcr_write_seqs(fp, hcr_enc, err); } if (!had_err) { gt_assert(fp); gt_xfseek(fp, pos, SEEK_SET); gt_xfwrite_one(&hcr_enc->seq_encoder->startofsamplingtab, fp); } gt_fa_xfclose(fp); } return 0; }
static void hcr_write_file_info(FILE *fp, GtHcrEncoder *hcr_enc) { GtUword i; gt_xfwrite_one(&hcr_enc->num_of_files, fp); for (i = 0; i < hcr_enc->num_of_files; i++) { gt_xfwrite_one(&hcr_enc->seq_encoder->fileinfos[i].readnum, fp); gt_xfwrite_one(&hcr_enc->seq_encoder->fileinfos[i].readlength, fp); } }
static int hcr_huffman_write_base_qual_freq(GtUword symbol, GtUint64 freq, GT_UNUSED GtBitsequence code, GT_UNUSED unsigned code_length, void *pt) { GtUchar base, qual; WriteNodeInfo *info = (WriteNodeInfo*)pt; gt_safe_assign(base, (symbol % gt_alphabet_size(info->alpha))); if (base == (GtUchar) gt_alphabet_size(info->alpha) - 1) base = (GtUchar) WILDCARD; gt_safe_assign(base, (toupper(gt_alphabet_decode(info->alpha, base)))); gt_xfwrite_one(&base, info->output); gt_safe_assign(qual, (symbol / gt_alphabet_size(info->alpha) + info->qual_offset)); gt_xfwrite_one(&qual, info->output); gt_xfwrite_one(&freq, info->output); return 0; }
static int hcr_write_seqdistrtab(FILE *fp, GtHcrEncoder *hcr_enc) { WriteNodeInfo *info; int had_err = 0; GtUword numofleaves; info = gt_calloc((size_t) 1, sizeof (WriteNodeInfo)); info->alpha = hcr_enc->seq_encoder->alpha; info->qual_offset = hcr_enc->seq_encoder->qual_offset; info->output = fp; numofleaves = gt_huffman_numofsymbols(hcr_enc->seq_encoder->huffman); gt_xfwrite_one(&numofleaves, fp); had_err = gt_huffman_iterate(hcr_enc->seq_encoder->huffman, hcr_huffman_write_base_qual_freq, info); gt_free(info); return had_err; }
static int gt_condenser_extract_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { int had_err = 0; GtCondenserExtractArguments *arguments = tool_arguments; GtNREncseq *nre = NULL; GtEncseq *orig_encseq = NULL; GtEncseqLoader *esl; gt_error_check(err); gt_assert(arguments); /*load original encseq*/ esl = gt_encseq_loader_new(); orig_encseq = gt_encseq_loader_load(esl, gt_str_get(arguments->original), err); if (!orig_encseq) { had_err = -1; } gt_encseq_loader_delete(esl); if (!had_err) { nre = gt_n_r_encseq_new_from_file(argv[parsed_args], orig_encseq, err); if (nre == NULL) { had_err = -1; } } /*TODO get sequences by sequence ids: not yet implemented in n_r_encseq*/ /*if (!had_err && arguments->range.start == GT_UNDEF_UWORD && uedb != NULL) { GtUword idx, start = arguments->seqrange.start == GT_UNDEF_UWORD ? 0 : arguments->seqrange.start, end = arguments->seqrange.end == GT_UNDEF_UWORD ? uedb->nseq - 1 : arguments->seqrange.end; for (idx = start; idx <= end && !had_err; idx++) { had_err = gt_unique_encseq_get_sequence_from_idx(idx, unique_encseq, uedb, stdout, err); } } else if (!had_err) {*/ if (!had_err) { GtNREncseqDecompressor *nred = gt_n_r_encseq_decompressor_new(nre); if (arguments->range.start == GT_UNDEF_ULONG && arguments->range.end == GT_UNDEF_ULONG) { had_err = gt_n_r_encseq_decompressor_extract_origin_complete(stdout, nred, true, err); } else { had_err = gt_n_r_encseq_decompressor_extract_originrange(stdout, nred, &arguments->range, false, err); } gt_xfwrite_one("\n",stdout); /*TODO should better be in n_r_encseq.c?*/ gt_n_r_encseq_decompressor_delete(nred); } gt_n_r_encseq_delete(nre); gt_encseq_delete(orig_encseq); return had_err; }