static inline
KOccurRun* fetch_ref_contact(const GraphCache *cache, uint32_t pathid,
                             const PathRefRun *ref_runs,
                             KOccurRunBuffer *runbuf)
{
  // Get path
  const GCachePath *path = graph_cache_path(cache, pathid);
  const GCacheStep *steps = graph_cache_step(cache, path->first_step);
  size_t num_steps = path->num_steps;

  // Get runs along the ref
  PathRefRun ref_run = ref_runs[pathid];
  size_t num_runs = ref_run.num_runs;
  KOccurRun *koruns = runbuf->data + ref_run.first_runid;
  koruns_sort_by_qoffset(koruns, num_runs);

  // Set qoffset to be the kmer offset in the path
  size_t r, s, offset = 0;

  for(s = r = 0; s < num_steps; s++) {
    for(; r < num_runs && koruns[r].qoffset == s; r++) {
      koruns[r].qoffset = offset;
    }

    if(r == num_runs) break;

    const GCacheSnode *snode = graph_cache_snode(cache, steps[s].supernode);
    offset += snode->num_nodes;
  }

  return koruns;
}
// If `pickup_new_runs` is true we pick up runs starting at this supernode
static inline bool gcrawler_stop_at_ref_covg(const GraphCache *cache,
                                             const GCacheStep *step,
                                             BreakpointCaller *caller,
                                             KOccurRunBuffer *koruns,
                                             KOccurRunBuffer *koruns_ended,
                                             bool pickup_new_runs)
{
  GCacheSnode *snode = graph_cache_snode(cache, step->supernode);
  GCachePath *path = graph_cache_path(cache, step->pathid);
  const dBNode *nodes = graph_cache_first_node(cache, snode);
  bool forward = (step->orient == FORWARD);

  // Use index of last step as qoffset
  size_t qoffset = path->num_steps-1;

  // Kmer occurance runs are added to koruns_3p_ended only if they end and are
  // longer than the mininum length in kmers (caller->min_ref_nkmers)
  kograph_filter_extend(caller->kograph,
                        nodes, snode->num_nodes, forward,
                        caller->min_ref_nkmers, qoffset,
                        koruns, koruns_ended,
                        pickup_new_runs);

  size_t i, min_run_qoffset = SIZE_MAX, min_ended_run_qoffset = SIZE_MAX;
  for(i = 0; i < koruns->len; i++)
    min_run_qoffset = MIN2(min_run_qoffset, koruns->data[i].qoffset);

  // Stop if all our earliest runs have finished
  for(i = 0; i < koruns_ended->len; i++) {
    min_ended_run_qoffset = MIN2(min_ended_run_qoffset, koruns_ended->data[i].qoffset);
  }

  // Continue if...
  return min_run_qoffset <= min_ended_run_qoffset;
}
示例#3
0
// Constructs a path of supernodes (SupernodePath)
// `wlk` GraphWalker should be set to go at `node`
// `rptwlk` RepeatWalker should be clear
// `jmpfunc` is called with each supernode traversed and if it returns true
//           we continue crawling, otherwise we stop. If NULL assume always true
// returns pathid in GraphCache
uint32_t graph_crawler_load_path(GraphCache *cache, dBNode node,
                                 GraphWalker *wlk, RepeatWalker *rptwlk,
                                 bool (*jmpfunc)(GraphCache *_cache,
                                                 GCacheStep *_step, void *_arg),
                                 void *arg)
{
  size_t i;
  uint32_t stepid, pathid = graph_cache_new_path(cache);

  ctx_assert(db_nodes_are_equal(wlk->node, node));

  for(i = 0; ; i++)
  {
    stepid = graph_cache_new_step(cache, node);

    GCacheStep *step = graph_cache_step(cache, stepid);
    GCacheSnode *snode = graph_cache_snode(cache, step->supernode);

    // Traverse to the end of the supernode
    walk_supernode_end(cache, snode, step->orient, wlk);

    if(jmpfunc != NULL && !jmpfunc(cache, step, arg)) break;

    // Find next node
    uint8_t num_edges;
    const dBNode *next_nodes;
    Nucleotide next_bases[4];

    if(step->orient == FORWARD) {
      num_edges = snode->num_next;
      next_nodes = snode->next_nodes;
      binary_seq_unpack_byte(next_bases, snode->next_bases);
    }
    else {
      num_edges = snode->num_prev;
      next_nodes = snode->prev_nodes;
      binary_seq_unpack_byte(next_bases, snode->prev_bases);
    }

    // Traverse to next supernode
    if(!graph_walker_next_nodes(wlk, num_edges, next_nodes, next_bases) ||
       !rpt_walker_attempt_traverse(rptwlk, wlk)) break;

    node = wlk->node;
  }

  return pathid;
}
示例#4
0
static void write_bubbles_to_file(BubbleCaller *caller)
{
  // Loop over supernodes checking if they are 3p flanks
  size_t snode_count = graph_cache_num_snodes(&caller->cache);
  GCacheSnode *snode;
  size_t i;

  for(i = 0; i < snode_count; i++)
  {
    snode = graph_cache_snode(&caller->cache, i);
    find_bubbles_ending_with(caller, snode);

    if(caller->spp_forward.len > 1)
      print_bubble(caller, caller->spp_forward.b, caller->spp_forward.len);
    if(caller->spp_reverse.len > 1)
      print_bubble(caller, caller->spp_reverse.b, caller->spp_reverse.len);
  }
}
示例#5
0
void graph_crawler_reset_rpt_walker(RepeatWalker *rptwlk,
                                    const GraphCache *cache, uint32_t pathid)
{
  rpt_walker_fast_clear(rptwlk, NULL, 0);

  const GCachePath *path = graph_cache_path(cache, pathid);
  const GCacheStep *step = graph_cache_step(cache, path->first_step), *endstep;
  const GCacheSnode *snode;
  const dBNode *node0, *node1;

  // Loop over supernodes in the path
  for(endstep = step + path->num_steps; step < endstep; step++)
  {
    // We don't care about orientation here
    snode = graph_cache_snode(cache, step->supernode);
    node0 = graph_cache_first_node(cache, snode);
    node1 = graph_cache_last_node(cache, snode);
    rpt_walker_fast_clear_single_node(rptwlk, *node0);
    rpt_walker_fast_clear_single_node(rptwlk, *node1);
  }
}
示例#6
0
// Potential bubble - filter ref and duplicate alleles
static void print_bubble(BubbleCaller *caller,
                         GCacheStep **steps, size_t num_paths)
{
  const BubbleCallingPrefs prefs = caller->prefs;
  const dBGraph *db_graph = caller->db_graph;
  GCacheSnode *snode;
  size_t i;

  dBNodeBuffer *flank5p = &caller->flank5p;
  if(flank5p->len == 0)
  {
    // Haven't fetched 5p flank yet
    // flank5p[0] already contains the first node
    flank5p->len = 1;
    supernode_extend(flank5p, prefs.max_flank_len, db_graph);
    db_nodes_reverse_complement(flank5p->b, flank5p->len);
  }

  //
  // Print Bubble
  //

  // write to string buffer then flush to gzFile
  StrBuf *sbuf = &caller->output_buf;
  strbuf_reset(sbuf);

  // Temporary node buffer to use
  dBNodeBuffer *pathbuf = &caller->pathbuf;
  db_node_buf_reset(pathbuf);

  // Get bubble number (threadsafe num_bubbles_ptr++)
  size_t id = __sync_fetch_and_add((volatile size_t*)caller->num_bubbles_ptr, 1);

  // This can be set to anything without a '.' in it
  const char prefix[] = "call";

  // 5p flank
  // strbuf_sprintf(sbuf, ">bubble.%s%zu.5pflank kmers=%zu\n", prefix, id, flank5p->len);
  strbuf_append_str(sbuf, ">bubble.");
  strbuf_append_str(sbuf, prefix);
  strbuf_append_ulong(sbuf, id);
  strbuf_append_str(sbuf, ".5pflank kmers=");
  strbuf_append_ulong(sbuf, flank5p->len);
  strbuf_append_char(sbuf, '\n');
  branch_to_str(flank5p->b, flank5p->len, true, sbuf, db_graph);

  // 3p flank
  db_node_buf_reset(pathbuf);
  snode = graph_cache_snode(&caller->cache, steps[0]->supernode);
  graph_cache_snode_fetch_nodes(&caller->cache, snode, steps[0]->orient, pathbuf);

  // strbuf_sprintf(sbuf, ">bubble.%s%zu.3pflank kmers=%zu\n", prefix, id, pathbuf->len);
  strbuf_append_str(sbuf, ">bubble.");
  strbuf_append_str(sbuf, prefix);
  strbuf_append_ulong(sbuf, id);
  strbuf_append_str(sbuf, ".3pflank kmers=");
  strbuf_append_ulong(sbuf, pathbuf->len);
  strbuf_append_char(sbuf, '\n');
  branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph);

  // Print alleles
  for(i = 0; i < num_paths; i++)
  {
    db_node_buf_reset(pathbuf);
    graph_cache_step_fetch_nodes(&caller->cache, steps[i], pathbuf);

    // strbuf_sprintf(sbuf, ">bubble.%s%zu.branch.%zu kmers=%zu\n",
    //                prefix, id, i, pathbuf->len);
    strbuf_append_str(sbuf, ">bubble.");
    strbuf_append_str(sbuf, prefix);
    strbuf_append_ulong(sbuf, id);
    strbuf_append_str(sbuf, ".branch.");
    strbuf_append_ulong(sbuf, i);
    strbuf_append_str(sbuf, " kmers=");
    strbuf_append_ulong(sbuf, pathbuf->len);
    strbuf_append_char(sbuf, '\n');

    branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph);
  }

  strbuf_append_char(sbuf, '\n');

  ctx_assert(strlen(sbuf->b) == sbuf->end);

  // lock, print, unlock
  pthread_mutex_lock(caller->out_lock);
  gzwrite(caller->gzout, sbuf->b, sbuf->end);
  pthread_mutex_unlock(caller->out_lock);
}