示例#1
0
文件: hcr.c 项目: mader/genometools
int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum,
                          char *seq, char *qual, GtStr *desc, GtError *err)
{
    GtUword nearestsample = 0,
            reads_to_read = 0,
            idx,
            current_read = hcr_dec->seq_dec->cur_read ;
    size_t startofnearestsample = 0;
    GtSampling *sampling;
    HcrHuffDataIterator *data_iter;
    GtHuffmanDecoder *huff_dec;

    gt_error_check(err);
    gt_assert(hcr_dec);
    gt_assert(readnum < hcr_dec->seq_dec->num_of_reads);
    gt_assert(seq != NULL && qual != NULL);

    if (current_read == readnum) {
        if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) {
            gt_assert(gt_error_is_set(err));
            return -1;
        }
    }
    else {
        sampling = hcr_dec->seq_dec->sampling;
        data_iter = hcr_dec->seq_dec->data_iter;
        huff_dec = hcr_dec->seq_dec->huff_dec;

        if (sampling != NULL) {
            gt_sampling_get_page(sampling,
                                 readnum,
                                 &nearestsample,
                                 &startofnearestsample);
            /* nearestsample <= cur_read < readnum: current sample is the right one */
            if (nearestsample <= current_read && current_read <= readnum)
                reads_to_read = readnum - current_read;
            else { /* reset decoder to new sample */
                reset_data_iterator_to_pos(data_iter, startofnearestsample);
                (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
                if (gt_error_is_set(err))
                    return -1;
                reads_to_read = readnum - nearestsample;
                hcr_dec->seq_dec->cur_read = nearestsample;
            }
            gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"",
                       reads_to_read,nearestsample);
            gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample);
        }
        else {
            if (current_read <= readnum)
                reads_to_read = readnum - current_read;
            else {
                reset_data_iterator_to_start(data_iter);
                (void) gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
                if (gt_error_is_set(err))
                    return -1;
                reads_to_read = readnum;
                hcr_dec->seq_dec->cur_read = 0;
            }
        }

        for (idx = 0; idx < reads_to_read; idx++) {
            if (hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1) {
                gt_assert(gt_error_is_set(err));
                return -1;
            }
            gt_log_log("seq:\n%s\nqual:\n%s", seq, qual);
        }

        if (hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1) {
            gt_assert(gt_error_is_set(err));
            return -1;
        }
    }

    if (hcr_dec->encdesc != NULL) {
        if (gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err) == -1) {
            gt_error_set(err, "cannot retrieve description with number "GT_WU"."
                         "(%d)", readnum, __LINE__);
            return -1;
        }
    }
    return 0;
}
示例#2
0
文件: hcr.c 项目: mader/genometools
static void reset_data_iterator_to_start(HcrHuffDataIterator *data_iter)
{
    gt_assert(data_iter);
    reset_data_iterator_to_pos(data_iter, data_iter->start);
}
示例#3
0
文件: hcr.c 项目: mader/genometools
static int hcr_next_seq_qual(GtHcrSeqDecoder *seq_dec, char *seq, char *qual,
                             GtError *err)
{
    enum state {
        HCR_ERROR = -1,
        END,
        SUCCESS
    };
    unsigned char base;
    GtUword i,
            nearestsample,
            *symbol;
    size_t startofnearestsample = 0;
    enum state status = END;
    FastqFileInfo cur_read;
    FastqFileInfo *fileinfo = NULL;

    if (seq_dec->cur_read <= seq_dec->num_of_reads) {
        status = SUCCESS;
        if (seq_dec->symbols == NULL)
            seq_dec->symbols = gt_array_new(sizeof (GtUword));
        else
            gt_array_reset(seq_dec->symbols);

        cur_read.readnum = seq_dec->cur_read;
        gt_log_log("cur_read: "GT_WU"",seq_dec->cur_read);
        fileinfo = (FastqFileInfo *)gt_rbtree_next_key(seq_dec->file_info_rbt,
                   &cur_read,
                   hcr_cmp_FastqFileInfo,
                   NULL);
        gt_assert(fileinfo);

        /* reset huffman_decoder if next read is sampled */
        if (gt_sampling_get_next_elementnum(seq_dec->sampling) ==
                seq_dec->cur_read) {
            gt_log_log("reset because sampled read is next");
            (void) gt_sampling_get_next_sample(seq_dec->sampling,
                                               &nearestsample,
                                               &startofnearestsample);
            reset_data_iterator_to_pos(seq_dec->data_iter, startofnearestsample);
            (void) gt_huffman_decoder_get_new_mem_chunk(seq_dec->huff_dec, err);
            if (gt_error_is_set(err))
                status = HCR_ERROR;
        }
        if (status != HCR_ERROR) {
            int ret;
            ret =  gt_huffman_decoder_next(seq_dec->huff_dec, seq_dec->symbols,
                                           fileinfo->readlength, err);
            if (ret != 1)
                status = HCR_ERROR;
            if (ret == 0)
                gt_error_set(err, "reached end of file");
        }
        if (qual || seq) {
            gt_log_log("set strings");
            for (i = 0; i < gt_array_size(seq_dec->symbols); i++) {
                symbol = (GtUword*) gt_array_get(seq_dec->symbols, i);
                if (qual != NULL)
                    qual[i] = get_qual_from_symbol(seq_dec, *symbol);
                if (seq != NULL) {
                    base = get_base_from_symbol(seq_dec, *symbol);
                    seq[i] = (char)toupper(gt_alphabet_decode(seq_dec->alpha,
                                           (GtUchar) base));
                }
            }
            if (qual != NULL)
                qual[gt_array_size(seq_dec->symbols)] = '\0';
            if (seq != NULL)
                seq[gt_array_size(seq_dec->symbols)] = '\0';
        }
        seq_dec->cur_read++;
    }
    return (int) status;
}
示例#4
0
int gt_hcr_decoder_decode(GtHcrDecoder *hcr_dec, GtUword readnum,
                          char *seq, char *qual, GtStr *desc, GtError *err)
{
  int had_err = 0;
  GtUword nearestsample = 0,
                reads_to_read = 0,
                idx,
                current_read = hcr_dec->seq_dec->cur_read ;
  size_t startofnearestsample = 0;
  GtSampling *sampling;
  HcrHuffDataIterator *data_iter;
  GtHuffmanDecoder *huff_dec;

  gt_error_check(err);
  gt_assert(hcr_dec);
  gt_assert(readnum < hcr_dec->seq_dec->num_of_reads);
  gt_assert(seq != NULL && qual != NULL);

  if (current_read == readnum)
    had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ?
      -1 : 0;
  else {
    sampling = hcr_dec->seq_dec->sampling;
    data_iter = hcr_dec->seq_dec->data_iter;
    huff_dec = hcr_dec->seq_dec->huff_dec;

    if (sampling != NULL) {
      gt_sampling_get_page(sampling,
                           readnum,
                           &nearestsample,
                           &startofnearestsample);
      /* nearestsample <= cur_read < readnum: current sample is the right one */
      if (nearestsample <= current_read && current_read <= readnum)
        reads_to_read = readnum - current_read;
      else { /* reset decoder to new sample */
        reset_data_iterator_to_pos(data_iter, startofnearestsample);
        had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
        reads_to_read = readnum - nearestsample;
        hcr_dec->seq_dec->cur_read = nearestsample;
      }
      gt_log_log("reads to read: "GT_WU", nearest sample: "GT_WU"",
                 reads_to_read,nearestsample);
      gt_log_log("start of nearest: "GT_WU"", (GtUword) startofnearestsample);
    }
    else {
      if (current_read <= readnum)
        reads_to_read = readnum - current_read;
      else {
        reset_data_iterator_to_start(data_iter);
        had_err = gt_huffman_decoder_get_new_mem_chunk(huff_dec, err);
        reads_to_read = readnum;
        hcr_dec->seq_dec->cur_read = 0;
      }
    }

    for (idx = 0; !had_err && idx < reads_to_read; idx++)
      had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq,qual, err) == -1 ?
        -1 : 0;

    if (!had_err)
      had_err = hcr_next_seq_qual(hcr_dec->seq_dec, seq, qual, err) == -1 ?
        -1 : 0;
  }
  if (had_err)
    gt_assert(gt_error_is_set(err));

  if (!had_err && hcr_dec->encdesc != NULL)
    had_err = gt_encdesc_decode(hcr_dec->encdesc, readnum, desc, err);
  if (had_err)
    gt_assert(gt_error_is_set(err));

  return had_err;
}