GtUword gt_bioseq_md5_to_index(GtBioseq *bs, const char *md5) { gt_assert(bs && md5 && gt_encseq_has_md5_support(bs->encseq)); if (!bs->md5_tab) { bs->md5_tab = gt_encseq_get_md5_tab(bs->encseq, NULL); } return gt_md5_tab_map(bs->md5_tab, md5); }
const char* gt_bioseq_get_md5_fingerprint(GtBioseq *bs, GtUword idx) { gt_assert(bs && idx < gt_bioseq_number_of_sequences(bs)); if (!bs->md5_tab) { bs->md5_tab = gt_encseq_get_md5_tab(bs->encseq, NULL); } gt_assert(gt_md5_tab_get(bs->md5_tab, idx)); return gt_md5_tab_get(bs->md5_tab, idx); }
GtSeqCol* gt_encseq_col_new(GtEncseq *encseq, GtError *err) { GtSeqCol *sc; GtEncseqCol *esc; gt_error_check(err); gt_assert(encseq); if (!gt_encseq_has_md5_support(encseq)) { gt_error_set(err, "encoded sequence has no MD5 support"); return NULL; } sc = gt_seq_col_create(gt_encseq_col_class()); esc = gt_encseq_col_cast(sc); esc->md5_tab = gt_encseq_get_md5_tab(encseq, err); gt_assert(esc->md5_tab); esc->encseq = gt_encseq_ref(encseq); return sc; }
const char* gt_region_mapping_get_md5_fingerprint(GtRegionMapping *rm, GtStr *seqid, const GtRange *range, unsigned long *offset, GtError *err) { const char *md5 = NULL; int had_err; unsigned long filenum, seqnum; gt_error_check(err); gt_assert(rm && seqid); gt_assert(!rm->userawseq); /* not implemented */ gt_assert(!gt_md5_seqid_has_prefix(gt_str_get(seqid))); /* not implemented */ had_err = update_seq_col_if_necessary(rm, seqid, err); if (!had_err) { if (rm->usedesc) { gt_assert(rm->seqid2seqnum_mapping); had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping, gt_str_get(seqid), range, &seqnum, &filenum, offset, err); if (!had_err) md5 = gt_seq_col_get_md5_fingerprint(rm->seq_col, filenum, seqnum); } else if (rm->matchdesc) { if (!rm->seq_col) { if (rm->encseq) { if (!(rm->seq_col = gt_encseq_col_new(rm->encseq, err))) had_err = -1; } else { if (!(rm->seq_col = gt_bioseq_col_new(rm->sequence_filenames, err))) had_err = -1; } } if (!had_err) (void) gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err); *offset = 1; } else if (rm->useseqno) { GtMD5Tab *tab = NULL; unsigned long seqno = GT_UNDEF_ULONG; gt_assert(rm->encseq); if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) { gt_error_set(err, "seqid '%s' does not have the form 'seqX' " "where X is a sequence number in the encoded " "sequence", gt_str_get(seqid)); had_err = -1; } gt_assert(had_err || seqno != GT_UNDEF_ULONG); if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) { gt_error_set(err, "trying to access sequence %lu, but encoded" "sequence contains only %lu sequences", seqno, gt_encseq_num_of_sequences(rm->encseq)); had_err = -1; } if (!had_err) { tab = gt_encseq_get_md5_tab(rm->encseq, err); if (!tab) had_err = -1; } *offset = 1; if (!had_err) return gt_md5_tab_get(tab, seqno); else return NULL; } else { if (!had_err) md5 = gt_seq_col_get_md5_fingerprint(rm->seq_col, 0, 0); *offset = 1; } } return md5; }