static inline GtWord decoder_calc_start_of_encoded_data(FILE *fp) { bool is_not_at_pageborder; GtUword pagesize = gt_pagesize(); is_not_at_pageborder = (ftell(fp) % pagesize) != 0; if (is_not_at_pageborder) return (ftell(fp) / pagesize + 1) * pagesize; else return ftell(fp); }
static HcrHuffDataIterator *decoder_init_data_iterator(GtWord start_of_encoding, GtWord end_of_encoding, const GtStr *filename) { HcrHuffDataIterator *data_iter = gt_malloc(sizeof (*data_iter)); data_iter->path = gt_str_get(filename); data_iter->pos = data_iter->start = (size_t) start_of_encoding; data_iter->end = (size_t) end_of_encoding; data_iter->pages_per_chunk = HCR_PAGES_PER_CHUNK; data_iter->pagesize = gt_pagesize(); gt_assert(data_iter->start % data_iter->pagesize == 0); data_iter->blocksize = data_iter->pagesize * data_iter->pages_per_chunk; gt_safe_assign(data_iter->bitseq_per_chunk, (data_iter->blocksize / sizeof (GtBitsequence))); data_iter->data = NULL; return data_iter; }
GtSfxmappedrange *gt_Sfxmappedrange_new(const char *tablename, GtUword numofentries, GtSfxmappedrangetype type, GtSfxmappedrangetransformfunc transformfunc, const void *transformfunc_data) { GtSfxmappedrange *sfxmappedrange; sfxmappedrange = gt_malloc(sizeof (*sfxmappedrange)); sfxmappedrange->ptr = NULL; sfxmappedrange->pagesize = gt_pagesize(); sfxmappedrange->usedptrptr = NULL; sfxmappedrange->filename = NULL; sfxmappedrange->writable = false; sfxmappedrange->entire = NULL; sfxmappedrange->transformfunc = transformfunc; sfxmappedrange->transformfunc_data = transformfunc_data; sfxmappedrange->type = type; sfxmappedrange->tablename = gt_str_new_cstr(tablename); sfxmappedrange->currentminindex = sfxmappedrange->currentmaxindex = 0; sfxmappedrange->indexrange_defined = false; switch (type) { case GtSfxGtBitsequence: sfxmappedrange->sizeofunit = sizeof (GtBitsequence); sfxmappedrange->numofunits = GT_NUMOFINTSFORBITS(numofentries); break; case GtSfxuint32_t: sfxmappedrange->sizeofunit = sizeof (uint32_t); sfxmappedrange->numofunits = (size_t) numofentries; break; case GtSfxunsignedlong: sfxmappedrange->sizeofunit = sizeof (GtUword); sfxmappedrange->numofunits = (size_t) numofentries; break; default: gt_assert(false); break; } return sfxmappedrange; }
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha, bool descs, GtQualRange qrange, GtTimer *timer, GtError *err) { GtBaseQualDistr *bqd; GtHcrEncoder *hcr_enc; GtSeqIterator *seqit; GtStrArray *file; int had_err = 0, status; GtUword len1, len2, i, num_of_reads = 0; const GtUchar *seq, *qual; char *desc; gt_error_check(err); gt_assert(alpha && files); if (timer != NULL) gt_timer_show_progress(timer, "get <base,qual> distr", stdout); if (qrange.start != GT_UNDEF_UINT) if (qrange.start == qrange.end) { gt_error_set(err, "qrange.start must unequal qrange.end"); return NULL; } hcr_enc = gt_malloc(sizeof (GtHcrEncoder)); hcr_enc->files = files; hcr_enc->num_of_files = gt_str_array_size(files); hcr_enc->num_of_reads = 0; hcr_enc->page_sampling = false; hcr_enc->regular_sampling = false; hcr_enc->sampling_rate = 0; hcr_enc->pagesize = gt_pagesize(); if (descs) { hcr_enc->encdesc_encoder = gt_encdesc_encoder_new(); if (timer != NULL) gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer); } else hcr_enc->encdesc_encoder = NULL; hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder)); hcr_enc->seq_encoder->alpha = alpha; hcr_enc->seq_encoder->sampling = NULL; hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files, sizeof (*(hcr_enc->seq_encoder->fileinfos))); hcr_enc->seq_encoder->qrange = qrange; bqd = hcr_base_qual_distr_new(alpha, qrange); /* check if reads in the same file are of same length and get <base, quality> pair distribution */ for (i = 0; i < hcr_enc->num_of_files; i++) { file = gt_str_array_new(); gt_str_array_add(file, gt_str_array_get_str(files, i)); seqit = gt_seq_iterator_fastq_new(file, err); if (!seqit) { gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object"); had_err = -1; } if (!had_err) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha)); gt_seq_iterator_set_quality_buffer(seqit, &qual); status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err); if (status == 1) { num_of_reads = 1UL; while (!had_err) { status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err); if (status == -1) had_err = -1; if (status != 1) break; if (len2 != len1) { gt_error_set(err, "reads have to be of equal length"); had_err = -1; break; } if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0) had_err = -1; len1 = len2; num_of_reads++; } } else if (status == -1) had_err = -1; if (!had_err) { if (i == 0) hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads; else hcr_enc->seq_encoder->fileinfos[i].readnum = hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads; hcr_enc->seq_encoder->fileinfos[i].readlength = len1; } } hcr_enc->num_of_reads += num_of_reads; gt_str_array_delete(file); gt_seq_iterator_delete(seqit); } if (!had_err) hcr_base_qual_distr_trim(bqd); if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "build huffman tree for sequences and" " qualities", stdout); hcr_enc->seq_encoder->huffman = gt_huffman_new(bqd, hcr_base_qual_distr_func, (GtUword) bqd->ncols * bqd->nrows); } if (!had_err) { hcr_enc->seq_encoder->qual_offset = bqd->qual_offset; hcr_base_qual_distr_delete(bqd); return hcr_enc; } return NULL; }