int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum, char *seq, char *qual, GtStr *desc, GtError *err) { GtUword nearestsample = 0, reads_to_read = 0, idx, current_read = hcr_dec->seq_dec->cur_read ; size_t startofnearestsample = 0; GtSampling *sampling; HcrHuffDataIterator *data_iter; GtHuffmanDecoder *huff_dec; gt_error_check(err); gt_assert(hcr_dec); gt_assert(readnum < hcr_dec->seq_dec->num_of_reads); gt_assert(seq != NULL && qual != NULL); if (current_read == readnum) { if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) { gt_assert(gt_error_is_set(err)); return -1; } } else { sampling = hcr_dec->seq_dec->sampling; data_iter = hcr_dec->seq_dec->data_iter; huff_dec = hcr_dec->seq_dec->huff_dec; if (sampling != NULL) { gt_sampling_get_page(sampling, readnum, &nearestsample, &startofnearestsample); /* nearestsample <= cur_read < readnum: current sample is the right one */ if (nearestsample <= current_read && current_read <= readnum) reads_to_read = readnum - current_read; else { /* reset decoder to new sample */ reset_data_iterator_to_pos(data_iter, startofnearestsample); (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err); if (gt_error_is_set(err)) return -1; reads_to_read = readnum - nearestsample; hcr_dec->seq_dec->cur_read = nearestsample; } gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"", reads_to_read,nearestsample); gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample); } else { if (current_read <= readnum) reads_to_read = readnum - current_read; else { reset_data_iterator_to_start(data_iter); (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err); if (gt_error_is_set(err)) return -1; reads_to_read = readnum; hcr_dec->seq_dec->cur_read = 0; } } for (idx = 0; idx < reads_to_read; idx++) { if (hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1) { gt_assert(gt_error_is_set(err)); return -1; } gt_log_log("seq:\n%s\nqual:\n%s", seq, qual); } if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) { gt_assert(gt_error_is_set(err)); return -1; } } if (hcr_dec->encdesc != NULL) { if (gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err) == -1) { gt_error_set(err, "cannot retrieve description with number "GT_WU"." "(%d)", readnum, __LINE__); return -1; } } return 0; }
static void reset_data_iterator_to_start(HcrHuffDataIterator *data_iter) { gt_assert(data_iter); reset_data_iterator_to_pos(data_iter, data_iter->start); }
static int hcr_next_seq_qual(GtHcrSeqDecoder *seq_dec, char *seq, char *qual, GtError *err) { enum state { HCR_ERROR = -1, END, SUCCESS }; unsigned char base; GtUword i, nearestsample, *symbol; size_t startofnearestsample = 0; enum state status = END; FastqFileInfo cur_read; FastqFileInfo *fileinfo = NULL; if (seq_dec->cur_read <= seq_dec->num_of_reads) { status = SUCCESS; if (seq_dec->symbols == NULL) seq_dec->symbols = gt_array_new(sizeof (GtUword)); else gt_array_reset(seq_dec->symbols); cur_read.readnum = seq_dec->cur_read; gt_log_log("cur_read: "GT_WU"",seq_dec->cur_read); fileinfo = (FastqFileInfo *)gt_rbtree_next_key(seq_dec->file_info_rbt, &cur_read, hcr_cmp_FastqFileInfo, NULL); gt_assert(fileinfo); /* reset huffman_decoder if next read is sampled */ if (gt_sampling_get_next_elementnum(seq_dec->sampling) == seq_dec->cur_read) { gt_log_log("reset because sampled read is next"); (void) gt_sampling_get_next_sample(seq_dec->sampling, &nearestsample, &startofnearestsample); reset_data_iterator_to_pos(seq_dec->data_iter, startofnearestsample); (void) gt_huffman_decoder_get_new_mem_chunk(seq_dec->huff_dec, err); if (gt_error_is_set(err)) status = HCR_ERROR; } if (status != HCR_ERROR) { int ret; ret = gt_huffman_decoder_next(seq_dec->huff_dec, seq_dec->symbols, fileinfo->readlength, err); if (ret != 1) status = HCR_ERROR; if (ret == 0) gt_error_set(err, "reached end of file"); } if (qual || seq) { gt_log_log("set strings"); for (i = 0; i < gt_array_size(seq_dec->symbols); i++) { symbol = (GtUword*) gt_array_get(seq_dec->symbols, i); if (qual != NULL) qual[i] = get_qual_from_symbol(seq_dec, *symbol); if (seq != NULL) { base = get_base_from_symbol(seq_dec, *symbol); seq[i] = (char)toupper(gt_alphabet_decode(seq_dec->alpha, (GtUchar) base)); } } if (qual != NULL) qual[gt_array_size(seq_dec->symbols)] = '\0'; if (seq != NULL) seq[gt_array_size(seq_dec->symbols)] = '\0'; } seq_dec->cur_read++; } return (int) status; }
int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum, char *seq, char *qual, GtStr *desc, GtError *err) { int had_err = 0; GtUword nearestsample = 0, reads_to_read = 0, idx, current_read = hcr_dec->seq_dec->cur_read ; size_t startofnearestsample = 0; GtSampling *sampling; HcrHuffDataIterator *data_iter; GtHuffmanDecoder *huff_dec; gt_error_check(err); gt_assert(hcr_dec); gt_assert(readnum < hcr_dec->seq_dec->num_of_reads); gt_assert(seq != NULL && qual != NULL); if (current_read == readnum) had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ? -1 : 0; else { sampling = hcr_dec->seq_dec->sampling; data_iter = hcr_dec->seq_dec->data_iter; huff_dec = hcr_dec->seq_dec->huff_dec; if (sampling != NULL) { gt_sampling_get_page(sampling, readnum, &nearestsample, &startofnearestsample); /* nearestsample <= cur_read < readnum: current sample is the right one */ if (nearestsample <= current_read && current_read <= readnum) reads_to_read = readnum - current_read; else { /* reset decoder to new sample */ reset_data_iterator_to_pos(data_iter, startofnearestsample); had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err); reads_to_read = readnum - nearestsample; hcr_dec->seq_dec->cur_read = nearestsample; } gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"", reads_to_read,nearestsample); gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample); } else { if (current_read <= readnum) reads_to_read = readnum - current_read; else { reset_data_iterator_to_start(data_iter); had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err); reads_to_read = readnum; hcr_dec->seq_dec->cur_read = 0; } } for (idx = 0; !had_err && idx < reads_to_read; idx++) had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1 ? -1 : 0; if (!had_err) had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ? -1 : 0; } if (had_err) gt_assert(gt_error_is_set(err)); if (!had_err && hcr_dec->encdesc != NULL) had_err = gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err); if (had_err) gt_assert(gt_error_is_set(err)); return had_err; }