Ejemplo n.º 1
0
// `nbuf` and `sbuf` are temporary variables used by this function
static void _call_bubble(BubbleCaller *caller,
                         const char *flank5p, const char *flank3p,
                         const char **alleles, size_t num_alleles,
                         dBNodeBuffer *nbuf, StrBuf *sbuf)
{
  const dBGraph *graph = caller->db_graph;
  const size_t kmer_size = graph->kmer_size;

  dBNode node5p = db_graph_find_str(graph, flank5p+strlen(flank5p)-kmer_size);
  dBNode node3p = db_graph_find_str(graph, flank3p);
  TASSERT(node5p.key != HASH_NOT_FOUND);
  TASSERT(node3p.key != HASH_NOT_FOUND);

  Edges edges5p = db_node_get_edges_union(graph, node5p.key);
  Edges edges3p = db_node_get_edges_union(graph, node3p.key);
  TASSERT(edges_get_outdegree(edges5p, node5p.orient) > 1);
  TASSERT(edges_get_indegree(edges3p, node3p.orient) > 1);

  find_bubbles(caller, node5p);

  GCacheUnitig *snode3p;
  Orientation snorient3p;
  GCacheStepPtrBuf *stepbuf;

  // Get 3p flank and orientation
  snode3p = graph_cache_find_unitig(&caller->cache, node3p);
  TASSERT(snode3p != NULL);
  snorient3p = gc_unitig_get_orient(&caller->cache, snode3p, node3p);

  find_bubbles_ending_with(caller, snode3p);

  stepbuf = (snorient3p == FORWARD ? &caller->spp_forward : &caller->spp_reverse);

  _check_alleles(&caller->cache, stepbuf, alleles, num_alleles, nbuf, sbuf);
}
Ejemplo n.º 2
0
static inline bool nodes_are_tip(dBNodeBuffer nbuf, const dBGraph *db_graph)
{
  Edges first = db_node_get_edges_union(db_graph, nbuf.b[0].key);
  Edges last = db_node_get_edges_union(db_graph, nbuf.b[nbuf.len-1].key);
  int in = edges_get_indegree(first, nbuf.b[0].orient);
  int out = edges_get_outdegree(last, nbuf.b[nbuf.len-1].orient);
  return (in+out <= 1);
}
Ejemplo n.º 3
0
/*
 * Get position after current segment. A segement is a stretch of kmers aligned
 * to the graph with no gaps.
 * @param aln           alignment of read to the graph
 * @param start         offset in the alignment that starts this segments
 * @param missing_edge  set to 1 if the segment was ended due to a missing edge
 * @return index of node after next gap or aln->nodes.len if no more gaps
 */
size_t db_alignment_next_gap(const dBAlignment *aln, size_t start,
                             bool *missing_edge, const dBGraph *db_graph)
{
  size_t i, end = aln->rpos.len;
  const int32_t *rpos = aln->rpos.b;
  const dBNode *nodes = aln->nodes.b;
  int colour = aln->colour;
  Edges edges;
  Nucleotide nuc;

  *missing_edge = false;

  if(end == 0) return 0;

  // Set upper bound on position of the next gap as within this read
  if(aln->used_r1 && aln->used_r2 && start < aln->r2strtidx)
    end = aln->r2strtidx;

  for(i = start+1; i < end && rpos[i-1]+1 == rpos[i]; i++)
  {
    // Check for a missing edge
    edges = colour < 0 ? db_node_get_edges_union(db_graph, nodes[i-1].key)
                       : db_node_get_edges(db_graph, nodes[i-1].key, colour);
    nuc = db_node_get_last_nuc(nodes[i], db_graph);

    if(!edges_has_edge(edges, nuc, nodes[i-1].orient)) {
      *missing_edge = true;
      break;
    }
  }

  // Return position after gap
  return i;
}
Ejemplo n.º 4
0
// Print in/outdegree - For debugging mostly
// indegree/outdegree (2 means >=2)
// 00: ! 01: + 02: {
// 10: - 11: = 12: <
// 20: } 21: > 22: *
void db_nodes_print_edges(const dBNode *nodes, size_t num,
                          const dBGraph *db_graph, FILE *out)
{
  size_t i, indegree, outdegree;
  Edges edges;
  const char symbols[3][3] = {"!+{","-=<","}>*"};
  for(i = 0; i < num; i++) {
    edges = db_node_get_edges_union(db_graph, nodes[i].key);
    indegree  = MIN2(edges_get_indegree(edges,  nodes[i].orient), 2);
    outdegree = MIN2(edges_get_outdegree(edges, nodes[i].orient), 2);
    fputc(symbols[indegree][outdegree], out);
  }
}
Ejemplo n.º 5
0
static bool supernode_is_closed_cycle(const dBNode *nlist, size_t len,
                                         BinaryKmer bkmer0, BinaryKmer bkmer1,
                                         const dBGraph *db_graph)
{
  Edges edges0, edges1;
  BinaryKmer shiftkmer;
  Nucleotide nuc;
  const size_t kmer_size = db_graph->kmer_size;

  edges0 = db_node_get_edges_union(db_graph, nlist[0].key);
  if(edges_get_indegree(edges0, nlist[0].orient) != 1) return false;

  edges1 = db_node_get_edges_union(db_graph, nlist[len-1].key);
  if(edges_get_outdegree(edges1, nlist[len-1].orient) != 1) return false;

  nuc = bkmer_get_last_nuc(bkmer0, nlist[0].orient, kmer_size);
  shiftkmer = bkmer_shift_add_last_nuc(bkmer1, nlist[len-1].orient, kmer_size, nuc);

  if(binary_kmers_are_equal(bkmer0, shiftkmer)) return true;

  shiftkmer = binary_kmer_reverse_complement(shiftkmer, kmer_size);
  return binary_kmers_are_equal(bkmer0, shiftkmer);
}
Ejemplo n.º 6
0
// Extend a supernode, nlist[offset] must already be set
// Walk along nodes starting from node/or, storing the supernode in nlist
// Returns the number of nodes added, adds no more than `limit`
// return false if out of space and limit > 0
bool supernode_extend(dBNodeBuffer *nbuf, size_t limit,
                         const dBGraph *db_graph)
{
  ctx_assert(nbuf->len > 0);

  const size_t kmer_size = db_graph->kmer_size;
  dBNode node0 = nbuf->data[0], node1 = nbuf->data[nbuf->len-1], node = node1;

  BinaryKmer bkmer = db_node_oriented_bkmer(db_graph, node);
  Edges edges = db_node_get_edges_union(db_graph, node.key);
  Nucleotide nuc;

  while(edges_has_precisely_one_edge(edges, node.orient, &nuc))
  {
    bkmer = binary_kmer_left_shift_add(bkmer, kmer_size, nuc);
    node = db_graph_find(db_graph, bkmer);
    edges = db_node_get_edges_union(db_graph, node.key);

    ctx_assert(node.key != HASH_NOT_FOUND);

    if(edges_has_precisely_one_edge(edges, rev_orient(node.orient), &nuc))
    {
      if(node.key == node0.key || node.key == nbuf->data[nbuf->len-1].key) {
        // don't create a loop A->B->A or a->b->B->A
        break;
      }

      if(limit && nbuf->len >= limit) return false;

      db_node_buf_add(nbuf, node);
    }
    else break;
  }

  return true;
}