static int fill_mapping(GtSeqid2SeqnumMapping *mapping, GtBioseq *bioseq, GtSeqCol *seqcol, GT_UNUSED GtError *err) { GtUword i, j, nof_sequences, nof_files; int had_err = 0; gt_error_check(err); gt_assert(mapping && (bioseq || seqcol) && !(bioseq && seqcol)); if (bioseq) { nof_files = 1; } else { gt_assert(seqcol); nof_files = gt_seq_col_num_of_files(seqcol); } for (j = 0; !had_err && j < nof_files; j++) { if (bioseq) nof_sequences = gt_bioseq_number_of_sequences(bioseq); else { gt_assert(seqcol); nof_sequences = gt_seq_col_num_of_seqs(seqcol, j); } for (i = 0; !had_err && i < nof_sequences; i++) { char *desc; if (bioseq) desc = gt_cstr_dup(gt_bioseq_get_description(bioseq, i)); else desc = gt_seq_col_get_description(seqcol, j, i); had_err = handle_description(mapping, desc, i, j, err); gt_free(desc); } } return had_err; }
int gt_region_mapping_get_description(GtRegionMapping *rm, GtStr *desc, GtStr *seqid, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(rm && desc && seqid); if (rm->userawseq) { gt_str_append_cstr(desc, "<rawseq>"); return 0; } had_err = update_seq_col_if_necessary(rm, seqid, err); if (!had_err) { if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, seqid, err); } return had_err; } if (!had_err) { if (rm->usedesc) { unsigned long filenum, seqnum; gt_assert(rm->seqid2seqnum_mapping); had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping, gt_str_get(seqid), NULL, &seqnum, &filenum, NULL, err); if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, filenum, seqnum); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } else if (rm->useseqno) { unsigned long seqno = GT_UNDEF_ULONG; gt_assert(rm->encseq); if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) { gt_error_set(err, "seqid '%s' does not have the form 'seqX' " "where X is a sequence number in the encoded " "sequence", gt_str_get(seqid)); had_err = -1; } gt_assert(had_err || seqno != GT_UNDEF_ULONG); if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) { gt_error_set(err, "trying to access sequence %lu, but encoded" "sequence contains only %lu sequences", seqno, gt_encseq_num_of_sequences(rm->encseq)); had_err = -1; } if (!had_err) { unsigned long desclen; const char *edesc; edesc = gt_encseq_description(rm->encseq, &desclen, seqno); gt_str_append_cstr_nt(desc, edesc, desclen); } } else if (rm->matchdesc) { const char *md5; /* XXX: not beautiful, but works -- this may be LOTS faster */ had_err = gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err); if (!had_err) { GtStr *md5_seqid = gt_str_new_cstr(md5); had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, md5_seqid, err); gt_str_delete(md5_seqid); } } else { if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, 0, 0); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } } return had_err; }