static int fill_mapping(GtSeqid2SeqnumMapping *mapping, GtBioseq *bioseq,
                        GtSeqCol *seqcol, GT_UNUSED GtError *err)
{
  GtUword i, j, nof_sequences, nof_files;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(mapping && (bioseq || seqcol) && !(bioseq && seqcol));
  if (bioseq) {
    nof_files = 1;
  } else {
    gt_assert(seqcol);
    nof_files = gt_seq_col_num_of_files(seqcol);
  }
  for (j = 0; !had_err && j < nof_files; j++) {
    if (bioseq)
      nof_sequences = gt_bioseq_number_of_sequences(bioseq);
    else {
      gt_assert(seqcol);
      nof_sequences = gt_seq_col_num_of_seqs(seqcol, j);
    }
    for (i = 0; !had_err && i < nof_sequences; i++) {
      char *desc;
      if (bioseq)
        desc = gt_cstr_dup(gt_bioseq_get_description(bioseq, i));
      else
        desc = gt_seq_col_get_description(seqcol, j, i);
      had_err = handle_description(mapping, desc, i, j, err);
      gt_free(desc);
    }
  }
  return had_err;
}
예제 #2
0
int gt_region_mapping_get_description(GtRegionMapping *rm, GtStr *desc,
                                      GtStr *seqid, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(rm && desc && seqid);
  if (rm->userawseq) {
    gt_str_append_cstr(desc, "<rawseq>");
    return 0;
  }
  had_err = update_seq_col_if_necessary(rm, seqid, err);
  if (!had_err) {
    if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
      had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, seqid,
                                              err);
    }
    return had_err;
  }
  if (!had_err) {
    if (rm->usedesc) {
      unsigned long filenum, seqnum;
      gt_assert(rm->seqid2seqnum_mapping);
      had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping,
                                            gt_str_get(seqid), NULL, &seqnum,
                                            &filenum, NULL, err);
      if (!had_err) {
        char *cdesc;
        cdesc = gt_seq_col_get_description(rm->seq_col, filenum, seqnum);
        gt_assert(cdesc);
        gt_str_append_cstr(desc, cdesc);
        gt_free(cdesc);
      }
    }
    else if (rm->useseqno) {
      unsigned long seqno = GT_UNDEF_ULONG;
      gt_assert(rm->encseq);
      if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) {
        gt_error_set(err, "seqid '%s' does not have the form 'seqX' "
                          "where X is a sequence number in the encoded "
                          "sequence", gt_str_get(seqid));
        had_err = -1;
      }
      gt_assert(had_err || seqno != GT_UNDEF_ULONG);
      if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) {
          gt_error_set(err, "trying to access sequence %lu, but encoded"
                            "sequence contains only %lu sequences",
                            seqno, gt_encseq_num_of_sequences(rm->encseq));
          had_err = -1;
      }
      if (!had_err) {
        unsigned long desclen;
        const char *edesc;
        edesc = gt_encseq_description(rm->encseq, &desclen, seqno);
        gt_str_append_cstr_nt(desc, edesc, desclen);
      }
    } else if (rm->matchdesc) {
      const char *md5;
      /* XXX: not beautiful, but works -- this may be LOTS faster */
      had_err = gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err);
      if (!had_err) {
        GtStr *md5_seqid = gt_str_new_cstr(md5);
        had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, md5_seqid,
                                                err);
        gt_str_delete(md5_seqid);
      }
    } else {
      if (!had_err) {
        char *cdesc;
        cdesc = gt_seq_col_get_description(rm->seq_col, 0, 0);
        gt_assert(cdesc);
        gt_str_append_cstr(desc, cdesc);
        gt_free(cdesc);
      }
    }
  }
  return had_err;
}