GtUword gt_condenseq_seqlength(const GtCondenseq *condenseq, GtUword seqnum)
{
  GtUword start = 0;
  if (seqnum != 0)
    start = gt_intset_get(condenseq->ssptab, seqnum - 1) + 1;
  return condenseq_seqlength_help(condenseq, seqnum, start);
}
GtUword gt_condenseq_each_redundant_range(
                                      GtCondenseq *condenseq,
                                      GtUword uid,
                                      GtRange urange,
                                      GtUword left_extend,
                                      GtUword right_extend,
                                      GtCondenseqProcessExtractedRange callback,
                                      void *callback_data,
                                      GtError *err)
{
  int had_err = 0;
  GtUword num_ranges = (GtUword) 1,
          linkidx,
          orig_seqnum,
          orig_seqstart,
          orig_seqend;
  const GtCondenseqUnique *unique;
  GtRange extract;

  gt_assert(condenseq != NULL);
  gt_assert(uid < condenseq->udb_nelems);

  unique = &condenseq->uniques[uid];

  /* handle unique itself */
  orig_seqnum = gt_condenseq_pos2seqnum(condenseq, unique->orig_startpos);
  orig_seqstart = gt_condenseq_seqstartpos(condenseq, orig_seqnum);
  orig_seqend = orig_seqstart + condenseq_seqlength_help(condenseq, orig_seqnum,
                                                         orig_seqstart) - 1;
  extract.start = unique->orig_startpos + urange.start;
  extract.start = extract.start < left_extend ?
    0 : extract.start - left_extend;
  extract.start = extract.start < orig_seqstart ?
    orig_seqstart : extract.start;
  extract.end = unique->orig_startpos + urange.end + right_extend;
  extract.end = extract.end > orig_seqend ?
    orig_seqend : extract.end;

  gt_assert(extract.start <= extract.end);
  had_err = callback(callback_data, orig_seqnum, extract, err);

  for (linkidx = 0;
       !had_err && linkidx < unique->links.nextfreeuint32_t;
       ++linkidx) {
    const GtCondenseqLink *link =
      &condenseq->links[unique->links.spaceuint32_t[linkidx]];

    /* the second part is a little heuristic, the len of the link could be
       completely comprised of insertions, which would place it downstream of
       urange.start. But we assume ~ similar lengths for links and their unique
       counterpart */
    if (!(urange.end < link->unique_offset ||
        urange.start > link->unique_offset + link->len - 1)) {
      GtUword shift;
      orig_seqnum = gt_condenseq_pos2seqnum(condenseq, link->orig_startpos);
      orig_seqstart = gt_condenseq_seqstartpos(condenseq, orig_seqnum);
      orig_seqend = orig_seqstart + condenseq_seqlength_help(condenseq,
                                                             orig_seqnum,
                                                             orig_seqstart) - 1;
      extract.start = link->orig_startpos < left_extend ?
        0 : link->orig_startpos - left_extend;
      if (urange.start < link->unique_offset) {
        shift = link->unique_offset - urange.start;
        extract.start = extract.start < shift ?
          0 : extract.start - shift;
      }
      else {
        shift = urange.start - link->unique_offset;
        extract.start += shift;
      }
      extract.start = extract.start < orig_seqstart ?
        orig_seqstart : extract.start;
      /* see heuristic note above */
      extract.end = link->orig_startpos + right_extend + link->len;
      if (urange.end < link->unique_offset + link->len - 1) {
        shift = (link->unique_offset + link->len - 1) - urange.end;
        extract.end = extract.end < shift ?
          0 : extract.end - shift;
      }
      else {
        shift = urange.end - (link->unique_offset + link->len - 1);
        extract.end += shift;
      }
      extract.end = extract.end > orig_seqend ?
        orig_seqend : extract.end;
      gt_assert(extract.start <= extract.end);
      had_err = callback(callback_data, orig_seqnum, extract, err);
      num_ranges++;
    }
  }

  if (!had_err)
    return num_ranges;
  return 0;
}
int gt_condenseq_output_to_gff3(const GtCondenseq *condenseq,
                                GtError *err)
{
  int had_err = 0;
  GtUword idx,
          name_len,
          seqnum = 0, seqstart = 0, seqend = 0,
          desclen;
  GtStr *filename = NULL,
        *id = gt_str_new_cstr("U"),
        *name = gt_str_new_cstr("unique"),
        *parent_unique = gt_str_new_cstr("U"),
        *seqid = gt_str_new(),
        *source = gt_str_new_cstr("Condenseq");
  GtFile *outfile = NULL;
  GtGFF3Visitor *gffv = NULL;
  GtNodeVisitor *nodev = NULL;
  GtFeatureNode *fnode = NULL;
  GtGenomeNode *node = NULL;
  GtRange range;

  gt_assert(condenseq != NULL);

  filename = gt_str_new_cstr(gt_condenseq_basefilename(condenseq));

  name_len = gt_str_length(name);
  gt_str_append_cstr(filename, ".gff3");
  outfile = gt_file_new(gt_str_get(filename), "w", err);
  nodev = gt_gff3_visitor_new(outfile);
  gffv = (GtGFF3Visitor *) nodev;
  gt_gff3_visitor_retain_id_attributes(gffv);

  node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                             (GtUword) 1, GT_STRAND_BOTH);
  fnode = (GtFeatureNode*) node;
  gt_feature_node_set_source(fnode, source);
  for (idx = 0; !had_err && idx < condenseq->udb_nelems; ++idx) {
    GtCondenseqUnique uq = condenseq->uniques[idx];
    if (seqend <= uq.orig_startpos) {
      const char *desc;
      gt_genome_node_delete(node);
      seqnum = gt_condenseq_pos2seqnum(condenseq, uq.orig_startpos);
      seqstart = gt_condenseq_seqstartpos(condenseq, seqnum);
      seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart);
      desc = gt_condenseq_description(condenseq, &desclen, seqnum);
      gt_str_reset(seqid);
      gt_str_append_cstr_nt(seqid, desc, desclen);
      node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                                 (GtUword) 1, GT_STRAND_BOTH);
      fnode = (GtFeatureNode*) node;
      gt_feature_node_set_source(fnode, source);
    }
    gt_str_set_length(name, name_len);
    gt_str_append_uword(name, idx);
    gt_str_set_length(id, (GtUword) 1);
    gt_str_append_uword(id, idx);
    gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name));
    gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id));
    /* 1 Based coordinates! */
    range.start = uq.orig_startpos + 1 - seqstart;
    range.end = uq.orig_startpos + uq.len - seqstart;
    gt_genome_node_set_range(node, &range);
    had_err = gt_genome_node_accept(node, nodev, err);
  }
  gt_str_reset(name);
  gt_str_append_cstr(name, "link");
  gt_str_reset(id);
  gt_str_append_cstr(id, "L");
  name_len = gt_str_length(name);
  seqend = 0;
  for (idx = 0; !had_err && idx < condenseq->ldb_nelems; ++idx) {
    GtCondenseqLink link = condenseq->links[idx];
    if (seqend <= link.orig_startpos) {
      const char *desc;
      gt_genome_node_delete(node);
      seqnum = gt_condenseq_pos2seqnum(condenseq, link.orig_startpos);
      seqstart = gt_condenseq_seqstartpos(condenseq, seqnum);
      seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart);
      desc = gt_condenseq_description(condenseq, &desclen, seqnum);
      gt_str_reset(seqid);
      gt_str_append_cstr_nt(seqid, desc, desclen);
      node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                                 (GtUword) 1, GT_STRAND_BOTH);
      fnode = (GtFeatureNode*) node;
      gt_feature_node_set_source(fnode, source);
    }
    gt_str_set_length(name, name_len);
    gt_str_append_uword(name, idx);
    gt_str_set_length(id, (GtUword) 1);
    gt_str_append_uword(id, idx);
    gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name));
    gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id));
    gt_str_set_length(parent_unique, (GtUword) 1);
    gt_str_append_uword(parent_unique, link.unique_id);
    gt_feature_node_set_attribute(fnode, "Derives_from",
                                  gt_str_get(parent_unique));
    /* 1 Based coordinates! */
    range.start = link.orig_startpos + 1 - seqstart;
    range.end = link.orig_startpos + link.len - seqstart;
    gt_genome_node_set_range(node, &range);
    had_err = gt_genome_node_accept(node, nodev, err);
  }
  gt_file_delete(outfile);
  gt_genome_node_delete(node);
  gt_node_visitor_delete(nodev);
  gt_str_delete(filename);
  gt_str_delete(id);
  gt_str_delete(name);
  gt_str_delete(parent_unique);
  gt_str_delete(seqid);
  gt_str_delete(source);
  return had_err;
}
示例#4
0
GtUword gt_condenseq_seqlength(const GtCondenseq *condenseq, GtUword seqnum)
{
  GtUword start = gt_condenseq_seqstartpos(condenseq, seqnum);
  return condenseq_seqlength_help(condenseq, seqnum, start);
}