예제 #1
0
// `node1` should be the first node of a supernode
// `node0` should be the previous node
// `next_base` is the last base of `node1`
// `jmpfunc` is called with each supernode traversed and if it returns true
//           we continue crawling, otherwise we stop
// `endfunc` is a function called at the end of traversal
void graph_crawler_fetch(GraphCrawler *crawler, dBNode node0,
                         dBNode next_nodes[4],
                         size_t take_idx, size_t num_next,
                         uint32_t *cols, size_t ncols,
                         bool (*jmpfunc)(GraphCache *_cache, GCacheStep *_step, void *_arg),
                         void (*endfunc)(GraphCache *_cache, uint32_t _pathid, void *_arg),
                         void *arg)
{
  const dBGraph *db_graph = crawler->cache.db_graph;
  GraphCache *cache = &crawler->cache;
  GraphWalker *wlk = &crawler->wlk;
  RepeatWalker *rptwlk = &crawler->rptwlk;
  GCUniColPath *unipaths = crawler->unicol_paths;

  ctx_assert(take_idx < num_next);
  ctx_assert(!db_nodes_are_equal(node0, next_nodes[take_idx]));

  // Fetch all paths in all colours
  dBNode node1 = next_nodes[take_idx];
  bool is_fork;
  size_t i, c, col, nedges_cols, num_unicol_paths = 0;
  int pathid;

  for(c = 0; c < ncols; c++)
  {
    col = (cols != NULL ? cols[c] : c);

    if(db_node_has_col(db_graph, node0.key, col) &&
       db_node_has_col(db_graph, node1.key, col))
    {
      // Determine if this fork is a fork in the current colour
      for(nedges_cols = 0, i = 0; i < num_next && nedges_cols <= 1; i++)
        nedges_cols += db_node_has_col(db_graph, next_nodes[i].key, col);

      is_fork = (nedges_cols > 1);

      graph_walker_setup(wlk, true, col, col, db_graph);
      graph_walker_start(wlk, node0);
      graph_walker_force(wlk, node1, is_fork);

      pathid = graph_crawler_load_path(cache, node1, wlk, rptwlk, jmpfunc, arg);

      if(endfunc != NULL) endfunc(cache, pathid, arg);

      graph_walker_finish(wlk);
      graph_crawler_reset_rpt_walker(rptwlk, cache, pathid);

      unipaths[num_unicol_paths++] = (GCUniColPath){.colour = col,
                                                    .pathid = pathid};
    }
    else
      pathid = -1;

    crawler->col_paths[col] = pathid;
  }
예제 #2
0
// Return 1 if changed; 0 otherwise
bool infer_pop_edges(const BinaryKmer node_bkey, Edges *edges,
                     const Covg *covgs, const dBGraph *db_graph)
{
  Edges uedges = 0, iedges = 0xf, add_edges, edge;
  size_t orient, nuc, col, kmer_size = db_graph->kmer_size;
  const size_t ncols = db_graph->num_of_cols;
  BinaryKmer bkey, bkmer;
  hkey_t next;
  Edges newedges[ncols];

  // char tmp[MAX_KMER_SIZE+1];
  // binary_kmer_to_str(node_bkey, db_graph->kmer_size, tmp);
  // status("Inferring %s", tmp);

  for(col = 0; col < ncols; col++) {
    uedges |= edges[col]; // union of edges
    iedges &= edges[col]; // intersection of edges
    newedges[col] = edges[col];
  }

  add_edges = uedges & ~iedges;

  if(!add_edges) return 0;

  for(orient = 0; orient < 2; orient++)
  {
    bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size)
                               : binary_kmer_right_shift_one_base(node_bkey));

    for(nuc = 0; nuc < 4; nuc++)
    {
      edge = nuc_orient_to_edge(nuc, orient);
      if(add_edges & edge)
      {
        // get next bkmer, look up in graph
        if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc);
        else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size);

        bkey = bkmer_get_key(bkmer, kmer_size);
        next = hash_table_find(&db_graph->ht, bkey);
        ctx_assert(next != HASH_NOT_FOUND);

        for(col = 0; col < ncols; col++)
          if(covgs[col] > 0 && db_node_has_col(db_graph, next, col))
            newedges[col] |= edge;
      }
    }
  }

  int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols);
  memcpy(edges, newedges, sizeof(Edges)*ncols);
  return (cmp != 0);
}
예제 #3
0
// Return 1 if changed; 0 otherwise
bool infer_all_edges(const BinaryKmer node_bkey, Edges *edges,
                     const Covg *covgs, const dBGraph *db_graph)
{
  Edges iedges = 0xff, edge;
  size_t orient, nuc, col, kmer_size = db_graph->kmer_size;
  const size_t ncols = db_graph->num_of_cols;
  BinaryKmer bkey, bkmer;
  hkey_t next;

  Edges newedges[ncols];
  memcpy(newedges, edges, ncols * sizeof(Edges));

  // intersection of edges
  for(col = 0; col < ncols; col++) iedges &= edges[col];

  for(orient = 0; orient < 2; orient++)
  {
    bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size)
                               : binary_kmer_right_shift_one_base(node_bkey));

    for(nuc = 0; nuc < 4; nuc++)
    {
      edge = nuc_orient_to_edge(nuc, orient);
      if(!(iedges & edge))
      {
        // edges are missing from some samples
        if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc);
        else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size);

        bkey = bkmer_get_key(bkmer, kmer_size);
        next = hash_table_find(&db_graph->ht, bkey);

        if(next != HASH_NOT_FOUND) {
          for(col = 0; col < ncols; col++) {
            if(covgs[col] > 0 && db_node_has_col(db_graph, next, col)) {
              newedges[col] |= edge;
            }
          }
        }
      }
    }
  }

  // Check if we changed the edges
  int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols);
  memcpy(edges, newedges, sizeof(Edges)*ncols);
  return (cmp != 0);
}
예제 #4
0
static inline void infer_edges_node(hkey_t hkey,
                                    bool add_all_edges,
                                    const dBGraph *db_graph,
                                    size_t *num_nodes_modified)
{
  BinaryKmer bkmer = db_node_bkmer(db_graph, hkey);
  Edges *edges = &db_node_edges(db_graph, hkey, 0);
  size_t col;

  // Create coverages that are zero or one depending on if node has colour
  Covg covgs[db_graph->num_of_cols];
  for(col = 0; col < db_graph->num_of_cols; col++)
    covgs[col] = db_node_has_col(db_graph, hkey, col);

  (*num_nodes_modified)
    += (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph)
                      : infer_pop_edges(bkmer, edges, covgs, db_graph));
}
예제 #5
0
static inline void _add_edge_to_colours(hkey_t next_hkey,
                                        const Covg *covgs, Edges *edges,
                                        Edges new_edge,
                                        const dBGraph *db_graph)
{
  size_t col, ncols = db_graph->num_of_cols;

  if(db_graph->col_covgs != NULL) {
    for(col = 0; col < ncols; col++) {
      if(covgs[col] > 0 && db_node_covg(db_graph, next_hkey, col)) {
        edges[col] |= new_edge;
      }
    }
  }
  else {
    for(col = 0; col < ncols; col++) {
      if(covgs[col] > 0 && db_node_has_col(db_graph, next_hkey, col)) {
        edges[col] |= new_edge;
      }
    }
  }
}
예제 #6
0
// Edges restricted to this colour, only in one direction (node.orient)
Edges db_node_edges_in_col(dBNode node, size_t col, const dBGraph *db_graph)
{
  if(db_graph->node_in_cols == NULL && db_graph->col_covgs == NULL) {
    Edges edges = db_node_get_edges(db_graph, node.key, col);
    return edges_mask_orientation(edges, node.orient);
  }

  // Edges are merged into one colour
  ctx_assert(db_graph->num_edge_cols == 1);
  ctx_assert(db_graph->node_in_cols != NULL || db_graph->col_covgs != NULL);

  Edges edges = db_node_get_edges(db_graph, node.key, 0);

  // Check which next nodes are in the given colour
  BinaryKmer bkmer = db_node_get_bkmer(db_graph, node.key);
  dBNode nodes[4];
  Nucleotide nucs[4];
  size_t i, n;

  n = db_graph_next_nodes(db_graph, bkmer, node.orient,
                          edges, nodes, nucs);

  edges = 0;
  if(db_graph->node_in_cols != NULL) {
    for(i = 0; i < n; i++)
      if(db_node_has_col(db_graph, nodes[i].key, col))
        edges = edges_set_edge(edges, nucs[i], node.orient);
  }
  else if(db_graph->col_covgs != NULL) {
    for(i = 0; i < n; i++)
      if(db_node_col_covg(db_graph, nodes[i].key, col) > 0)
        edges = edges_set_edge(edges, nucs[i], node.orient);
  }
  else ctx_assert(0);

  return edges;
}
예제 #7
0
static inline int infer_edges_node(hkey_t hkey,
                                   bool add_all_edges,
                                   Covg *tmp_covgs,
                                   const dBGraph *db_graph,
                                   size_t *num_nodes_modified)
{
  BinaryKmer bkmer = db_node_get_bkmer(db_graph, hkey);
  Edges *edges = &db_node_edges(db_graph, hkey, 0);
  size_t col;

  // Create coverages that are zero or one depending on if node has colour
  if(db_graph->col_covgs == NULL) {
    for(col = 0; col < db_graph->num_of_cols; col++)
      tmp_covgs[col] = db_node_has_col(db_graph, hkey, col);
  } else {
    tmp_covgs = &db_node_covg(db_graph, hkey, 0);
  }

  (*num_nodes_modified)
    += (add_all_edges ? infer_all_edges(bkmer, edges, tmp_covgs, db_graph)
                      : infer_pop_edges(bkmer, edges, tmp_covgs, db_graph));

  return 0; // => keep iterating
}
예제 #8
0
// if colour is -1 aligns to all colours, otherwise aligns to given colour only
// Returns number of kmers lost from the end
static size_t db_alignment_from_read(dBAlignment *aln, const read_t *r,
                                     uint8_t qcutoff, uint8_t hp_cutoff,
                                     const dBGraph *db_graph, int colour)
{
  size_t contig_start, contig_end = 0, search_start = 0;
  const size_t kmer_size = db_graph->kmer_size;

  BinaryKmer bkmer, tmp_key;
  Nucleotide nuc;
  hkey_t node;
  size_t i, offset, nxtbse;

  dBNodeBuffer *nodes = &aln->nodes;
  Int32Buffer *rpos = &aln->rpos;

  ctx_assert(nodes->len == rpos->len);
  size_t n = nodes->len, init_len = n;

  db_node_buf_capacity(nodes, n + r->seq.end);
  int32_buf_capacity(rpos, n + r->seq.end);

  while((contig_start = seq_contig_start(r, search_start, kmer_size,
                                         qcutoff, hp_cutoff)) < r->seq.end)
  {
    contig_end = seq_contig_end(r, contig_start, kmer_size,
                                qcutoff, hp_cutoff, &search_start);

    const char *contig = r->seq.b + contig_start;
    size_t contig_len = contig_end - contig_start;

    bkmer = binary_kmer_from_str(contig, kmer_size);
    bkmer = binary_kmer_right_shift_one_base(bkmer);

    for(offset=contig_start, nxtbse=kmer_size-1; nxtbse < contig_len; nxtbse++,offset++)
    {
      nuc = dna_char_to_nuc(contig[nxtbse]);
      bkmer = binary_kmer_left_shift_add(bkmer, kmer_size, nuc);
      tmp_key = binary_kmer_get_key(bkmer, kmer_size);
      node = hash_table_find(&db_graph->ht, tmp_key);

      if(node != HASH_NOT_FOUND &&
         (colour == -1 || db_node_has_col(db_graph, node, colour)))
      {
        nodes->b[n].key = node;
        nodes->b[n].orient = bkmer_get_orientation(bkmer, tmp_key);
        rpos->b[n] = offset;
        n++;
      }
    }
  }

  // Return number of bases from the last kmer found until read end
  size_t ret = (n == init_len ? r->seq.end /* No kmers found */
                              : r->seq.end - (rpos->b[n-1] + kmer_size));

  nodes->len = rpos->len = n;

  // Check for sequence gaps
  for(i = init_len; i+1 < nodes->len; i++) {
    if(rpos->b[i]+1 < rpos->b[i+1]) {
      aln->seq_gaps = true;
      break;
    }
  }

  return ret;
}
예제 #9
0
// `fork_node` is a node with outdegree > 1
void find_bubbles(BubbleCaller *caller, dBNode fork_node)
{
  graph_cache_reset(&caller->cache);

  const dBGraph *db_graph = caller->db_graph;
  GraphCache *cache = &caller->cache;
  GraphWalker *wlk = &caller->wlk;
  RepeatWalker *rptwlk = &caller->rptwlk;

  // char tmpstr[MAX_KMER_SIZE+3];
  // db_node_to_str(db_graph, fork_node, tmpstr);
  // status("Calling from %s", tmpstr);

  dBNode nodes[4];
  Nucleotide bases[4];
  size_t i, num_next, num_edges_in_col;
  BinaryKmer fork_bkmer = db_node_get_bkmer(db_graph, fork_node.key);

  num_next = db_graph_next_nodes(db_graph, fork_bkmer, fork_node.orient,
                                 db_node_edges(db_graph, fork_node.key, 0),
                                 nodes, bases);

  // loop over alleles, then colours
  Colour colour, colours_loaded = db_graph->num_of_cols;
  bool node_has_col[4];

  uint32_t pathid;

  for(colour = 0; colour < colours_loaded; colour++)
  {
    if(!db_node_has_col(db_graph, fork_node.key, colour)) continue;

    // Determine if this fork is a fork in the current colour
    num_edges_in_col = 0;
    for(i = 0; i < num_next; i++) {
      node_has_col[i] = (db_node_has_col(db_graph, nodes[i].key, colour) > 0);
      num_edges_in_col += node_has_col[i];
    }

    graph_walker_setup(wlk, true, colour, colour, db_graph);

    for(i = 0; i < num_next; i++)
    {
      if(node_has_col[i])
      {
        graph_walker_start(wlk, fork_node);
        graph_walker_force(wlk, nodes[i], num_edges_in_col > 1);

        pathid = graph_crawler_load_path_limit(cache, nodes[i], wlk, rptwlk,
                                               caller->prefs.max_allele_len);

        graph_walker_finish(wlk);
        graph_crawler_reset_rpt_walker(rptwlk, cache, pathid);
      }
    }
  }

  // Set up 5p flank
  caller->flank5p.b[0] = db_node_reverse(fork_node);
  caller->flank5p.len = 0; // set to one to signify we haven't fetched flank yet
}