Пример #1
0
// Traverse from node0 -> node1
static void traverse_5pflank(BreakpointCaller *caller, GraphCrawler *crawler,
                             dBNode node0, dBNode node1)
{
  const dBGraph *db_graph = crawler->cache.db_graph;
  dBNode next_nodes[4];
  Nucleotide next_nucs[4];
  size_t i, num_next;
  BinaryKmer bkmer0 = db_node_get_bkmer(db_graph, node0.key);

  num_next = db_graph_next_nodes(db_graph, bkmer0, node0.orient,
                                 db_node_edges(db_graph, node0.key, 0),
                                 next_nodes, next_nucs);

  // Find index of previous node
  for(i = 0; i < num_next && !db_nodes_are_equal(next_nodes[i],node1); i++) {}

  ctx_assert(i < num_next && db_nodes_are_equal(next_nodes[i],node1));

  kmer_run_buf_reset(&caller->koruns_5p);
  kmer_run_buf_reset(&caller->koruns_5p_ended);
  kmer_run_buf_reset(&caller->flank5p_run_buf);

  // Go backwards to get 5p flank
  // NULL means loop from 0..(ncols-1)
  graph_crawler_fetch(crawler, node0,
                      next_nodes, next_nucs, i, num_next,
                      NULL, db_graph->num_of_cols,
                      gcrawler_flank5p_stop_at_ref_covg,
                      gcrawler_flank5p_finish_ref_covg,
                      caller);
}
Пример #2
0
// `node1` should be the first node of a supernode
// `node0` should be the previous node
// `next_base` is the last base of `node1`
// `jmpfunc` is called with each supernode traversed and if it returns true
//           we continue crawling, otherwise we stop
// `endfunc` is a function called at the end of traversal
void graph_crawler_fetch(GraphCrawler *crawler, dBNode node0,
                         dBNode next_nodes[4],
                         size_t take_idx, size_t num_next,
                         uint32_t *cols, size_t ncols,
                         bool (*jmpfunc)(GraphCache *_cache, GCacheStep *_step, void *_arg),
                         void (*endfunc)(GraphCache *_cache, uint32_t _pathid, void *_arg),
                         void *arg)
{
  const dBGraph *db_graph = crawler->cache.db_graph;
  GraphCache *cache = &crawler->cache;
  GraphWalker *wlk = &crawler->wlk;
  RepeatWalker *rptwlk = &crawler->rptwlk;
  GCUniColPath *unipaths = crawler->unicol_paths;

  ctx_assert(take_idx < num_next);
  ctx_assert(!db_nodes_are_equal(node0, next_nodes[take_idx]));

  // Fetch all paths in all colours
  dBNode node1 = next_nodes[take_idx];
  bool is_fork;
  size_t i, c, col, nedges_cols, num_unicol_paths = 0;
  int pathid;

  for(c = 0; c < ncols; c++)
  {
    col = (cols != NULL ? cols[c] : c);

    if(db_node_has_col(db_graph, node0.key, col) &&
       db_node_has_col(db_graph, node1.key, col))
    {
      // Determine if this fork is a fork in the current colour
      for(nedges_cols = 0, i = 0; i < num_next && nedges_cols <= 1; i++)
        nedges_cols += db_node_has_col(db_graph, next_nodes[i].key, col);

      is_fork = (nedges_cols > 1);

      graph_walker_setup(wlk, true, col, col, db_graph);
      graph_walker_start(wlk, node0);
      graph_walker_force(wlk, node1, is_fork);

      pathid = graph_crawler_load_path(cache, node1, wlk, rptwlk, jmpfunc, arg);

      if(endfunc != NULL) endfunc(cache, pathid, arg);

      graph_walker_finish(wlk);
      graph_crawler_reset_rpt_walker(rptwlk, cache, pathid);

      unipaths[num_unicol_paths++] = (GCUniColPath){.colour = col,
                                                    .pathid = pathid};
    }
    else
      pathid = -1;

    crawler->col_paths[col] = pathid;
  }
Пример #3
0
// Constructs a path of supernodes (SupernodePath)
// `wlk` GraphWalker should be set to go at `node`
// `rptwlk` RepeatWalker should be clear
// `jmpfunc` is called with each supernode traversed and if it returns true
//           we continue crawling, otherwise we stop. If NULL assume always true
// returns pathid in GraphCache
uint32_t graph_crawler_load_path(GraphCache *cache, dBNode node,
                                 GraphWalker *wlk, RepeatWalker *rptwlk,
                                 bool (*jmpfunc)(GraphCache *_cache,
                                                 GCacheStep *_step, void *_arg),
                                 void *arg)
{
  size_t i;
  uint32_t stepid, pathid = graph_cache_new_path(cache);

  ctx_assert(db_nodes_are_equal(wlk->node, node));

  for(i = 0; ; i++)
  {
    stepid = graph_cache_new_step(cache, node);

    GCacheStep *step = graph_cache_step(cache, stepid);
    GCacheSnode *snode = graph_cache_snode(cache, step->supernode);

    // Traverse to the end of the supernode
    walk_supernode_end(cache, snode, step->orient, wlk);

    if(jmpfunc != NULL && !jmpfunc(cache, step, arg)) break;

    // Find next node
    uint8_t num_edges;
    const dBNode *next_nodes;
    Nucleotide next_bases[4];

    if(step->orient == FORWARD) {
      num_edges = snode->num_next;
      next_nodes = snode->next_nodes;
      binary_seq_unpack_byte(next_bases, snode->next_bases);
    }
    else {
      num_edges = snode->num_prev;
      next_nodes = snode->prev_nodes;
      binary_seq_unpack_byte(next_bases, snode->prev_bases);
    }

    // Traverse to next supernode
    if(!graph_walker_next_nodes(wlk, num_edges, next_nodes, next_bases) ||
       !rpt_walker_attempt_traverse(rptwlk, wlk)) break;

    node = wlk->node;
  }

  return pathid;
}
Пример #4
0
// Check we can walk along a set of nodes through the graph
// If @allow_extend is true, traverse past the end of the buffer and add nodes
static inline int confirm_seq(size_t startidx, bool allow_extend,
                              GraphWalker *wlk, RepeatWalker *rpt,
                              dBNodeBuffer *nbuf, size_t colour,
                              const dBGraph *db_graph)
{
  ctx_assert(startidx < nbuf->len);
  size_t i, init_len = nbuf->len;

  graph_walker_setup(wlk, true, colour, colour, db_graph);
  graph_walker_start(wlk, nbuf->b[startidx]);

  for(i = startidx+1; graph_walker_next(wlk); i++) {
    if(!rpt_walker_attempt_traverse(rpt, wlk)) {
      reset(wlk,rpt,nbuf);
      return CONFIRM_REPEAT;
    }
    if(i < init_len) {
      if(!db_nodes_are_equal(nbuf->b[i], wlk->node)) {
        reset(wlk,rpt,nbuf);
        return CONFIRM_WRONG;
      }
    }
    else {
      db_node_buf_add(nbuf, wlk->node);
      if(!allow_extend) {
        reset(wlk,rpt,nbuf);
        nbuf->len--; // Remove node we added
        return CONFIRM_OVERSHOT;
      }
    }
  }

  // printf("stopped %zu / %zu %zu\n", i, init_len, nbuf->len);

  reset(wlk,rpt,nbuf);
  return i < init_len ? CONFIRM_SHORT : CONFIRM_SUCCESS;
}
Пример #5
0
void test_graph_crawler()
{
  test_status("Testing graph crawler...");

  // Construct 1 colour graph with kmer-size=11
  dBGraph graph;
  const size_t kmer_size = 11, ncols = 3;

  db_graph_alloc(&graph, kmer_size, ncols, 1, 2048,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL | DBG_ALLOC_BKTLOCKS);

  char graphseq[3][77] =
//           <               X                 X              X...............
{"GTTCCAGAGCGGAGGTCTCCCAACAACATGGTATAAGTTGTCTAGCCCCGGTTCGCGCGGGTACTTCTTACAGCGC",
 "GTTCCAGAGCGGAGGTCTCCCAACAACTTGGTATAAGTTGTCTAGTCCCGGTTCGCGCGGCATTTCAGCATTGTTA",
 "GTTCCAGAGCGCGACAGAGTGCATATCACGCTAAGCACAGCCCTCTTCTATCTGCTTTTAAATGGATCAATAATCG"};

  build_graph_from_str_mt(&graph, 0, graphseq[0], strlen(graphseq[0]));
  build_graph_from_str_mt(&graph, 1, graphseq[1], strlen(graphseq[1]));
  build_graph_from_str_mt(&graph, 2, graphseq[2], strlen(graphseq[2]));

  // Crawl graph
  GraphCrawler crawler;
  graph_crawler_alloc(&crawler, &graph);

  dBNode node = db_graph_find_str(&graph, graphseq[0]);
  dBNode next_node = db_graph_find_str(&graph, graphseq[0]+1);
  TASSERT(node.key != HASH_NOT_FOUND);
  TASSERT(next_node.key != HASH_NOT_FOUND);

  BinaryKmer bkey = db_node_get_bkmer(&graph, node.key);
  Edges edges = db_node_get_edges(&graph, node.key, 0);

  dBNode next_nodes[4];
  Nucleotide next_nucs[4];
  size_t i, p, num_next, next_idx;

  num_next = db_graph_next_nodes(&graph, bkey, node.orient, edges,
                                 next_nodes, next_nucs);

  next_idx = 0;
  while(next_idx < num_next && !db_nodes_are_equal(next_nodes[next_idx],next_node))
    next_idx++;

  TASSERT(next_idx < num_next && db_nodes_are_equal(next_nodes[next_idx],next_node));

  // Crawl in all colours
  graph_crawler_fetch(&crawler, node, next_nodes, next_idx, num_next,
                      NULL, graph.num_of_cols, NULL, NULL, NULL);

  TASSERT2(crawler.num_paths == 2, "crawler.num_paths: %u", crawler.num_paths);

  // Fetch paths
  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 16);
  StrBuf sbuf;
  strbuf_alloc(&sbuf, 128);

  for(p = 0; p < crawler.num_paths; p++) {
    db_node_buf_reset(&nbuf);
    graph_crawler_get_path_nodes(&crawler, p, &nbuf);
    strbuf_ensure_capacity(&sbuf, nbuf.len+graph.kmer_size);
    sbuf.end = db_nodes_to_str(nbuf.b, nbuf.len, &graph, sbuf.b);
    for(i = 0; i < 3 && strcmp(graphseq[i]+1,sbuf.b) != 0; i++) {}
    TASSERT2(i < 3, "seq: %s", sbuf.b);
    TASSERT2(sbuf.end == 75, "sbuf.end: %zu", sbuf.end);
    TASSERT2(nbuf.len == 65, "nbuf.len: %zu", nbuf.len);
  }

  strbuf_dealloc(&sbuf);
  db_node_buf_dealloc(&nbuf);

  graph_crawler_dealloc(&crawler);

  db_graph_dealloc(&graph);
}
Пример #6
0
static inline
int test_statement_node(dBNode node, ExpABCWorker *wrkr)
{
  const dBGraph *db_graph = wrkr->db_graph;
  dBNodeBuffer *nbuf = &wrkr->nbuf;
  GraphWalker *wlk = &wrkr->gwlk;
  RepeatWalker *rpt = &wrkr->rptwlk;
  size_t b_idx, col = wrkr->colour;

  // rpt_walker_clear(rpt);

  db_node_buf_reset(nbuf);
  db_node_buf_add(nbuf, node);

  // size_t AB_limit = wrkr->prime_AB ? SIZE_MAX : wrkr->max_AB_dist;
  size_t walk_limit = wrkr->max_AB_dist;
  // status("walk_limit: %zu", walk_limit);

  // Walk from B to find A
  graph_walker_setup(wlk, true, col, col, db_graph);
  graph_walker_start(wlk, nbuf->b[0]);

  while(graph_walker_next(wlk) && nbuf->len < walk_limit) {
    if(!rpt_walker_attempt_traverse(rpt, wlk)) {
      reset(wlk,rpt,nbuf); return RES_LOST_IN_RPT;
    }
    db_node_buf_add(nbuf, wlk->node);
  }

  reset(wlk,rpt,nbuf);

  if(nbuf->len == 1) return RES_NO_TRAVERSAL;

  // Traverse A->B
  db_nodes_reverse_complement(nbuf->b, nbuf->len);
  b_idx = nbuf->len - 1;

  if(wrkr->prime_AB)
  {
    // Prime A->B without attempting to cross
    graph_walker_prime(wlk, nbuf->b, nbuf->len, nbuf->len, true);

    while(graph_walker_next(wlk)) {
      if(!rpt_walker_attempt_traverse(rpt, wlk)) {
        reset(wlk,rpt,nbuf); return RES_LOST_IN_RPT;
      }
      db_node_buf_add(nbuf, wlk->node);
    }
  }
  else
  {
    // Attempt to traverse A->B then extend past B
    int r = confirm_seq(0, true, wlk, rpt, nbuf, col, db_graph);
    switch(r) {
      case CONFIRM_REPEAT: return RES_LOST_IN_RPT;
      case CONFIRM_OVERSHOT: ctx_assert2(0,"Can't 'overshoot' when extending");
      case CONFIRM_WRONG: return RES_AB_WRONG;
      case CONFIRM_SHORT:
        if(wrkr->print_failed_contigs)
          print_failed(node, nbuf, db_graph, true, wrkr->prime_AB);
        wrkr->ab_fail_state[wlk->last_step.status]++;
        return RES_AB_FAILED;
    }
  }

  reset(wlk,rpt,nbuf);

  if(nbuf->len == b_idx+1) return RES_NO_TRAVERSAL; // Couldn't get past B

  // Last node is now C
  // Walk from B... record whether or not we reach C
  ctx_assert(db_nodes_are_equal(nbuf->b[b_idx], db_node_reverse(node)));

  int r = confirm_seq(b_idx, false, wlk, rpt, nbuf, col, db_graph);
  switch(r) {
    case CONFIRM_REPEAT: return RES_LOST_IN_RPT;
    case CONFIRM_OVERSHOT: return RES_BC_OVERSHOT;
    case CONFIRM_WRONG: return RES_BC_WRONG;
    case CONFIRM_SHORT:
      if(wrkr->print_failed_contigs)
        print_failed(node, nbuf, db_graph, false, wrkr->prime_AB);
      wrkr->bc_fail_state[wlk->last_step.status]++;
      return RES_BC_FAILED;
    case CONFIRM_SUCCESS: return RES_ABC_SUCCESS;
  }

  die("Shouldn't reach here: r=%i", r);
  return -1;
}