static void test_walk(GraphWalker *gwlk, RepeatWalker *rptwlk,
                      dBNode node0, dBNodeBuffer *nbuf,
                      const dBGraph *graph,
                      size_t expnkmers, const char *ans)
{
  db_node_buf_reset(nbuf);
  graph_walker_init(gwlk, graph, 0, 0, node0);

  do {
    db_node_buf_add(nbuf, gwlk->node);
  }
  while(graph_walker_next(gwlk) && rpt_walker_attempt_traverse(rptwlk, gwlk));

  // db_nodes_print(nbuf->data, nbuf->len, graph, stdout);
  // printf("\n");
  // printf("%s\n", graph_step_str[gwlk->last_step.status]);

  TASSERT2(nbuf->len == expnkmers, "%zu / %zu", nbuf->len, expnkmers);

  char tmp[nbuf->len+MAX_KMER_SIZE];
  db_nodes_to_str(nbuf->data, nbuf->len, graph, tmp);
  TASSERT2(strcmp(tmp,ans) == 0, "%s vs %s", tmp, ans);

  graph_walker_finish(gwlk);
  rpt_walker_fast_clear(rptwlk, nbuf->data, nbuf->len);
}
Example #2
0
static void _check_node_paths(const char *kmer,
                              const char **path_strs, size_t npaths,
                              size_t colour, const dBGraph *graph)
{
  TASSERT(strlen(kmer) == graph->kmer_size);

  const GPath *paths[npaths]; // corresponding to path_strs
  memset(paths, 0, sizeof(paths));
  size_t i, num_paths_seen = 0;

  const GPathStore *gpstore = &graph->gpstore;
  dBNode node = db_graph_find_str(graph, kmer);

  const GPath *path = gpath_store_fetch_traverse(gpstore, node.key);
  dBNodeBuffer nbuf;
  SizeBuffer jposbuf;
  db_node_buf_alloc(&nbuf, 64);
  size_buf_alloc(&jposbuf, 64);

  #define MAX_SEQ 128
  char seq[MAX_SEQ];

  for(; path != NULL; path = path->next)
  {
    if(path->orient == node.orient &&
       gpath_has_colour(path, gpstore->gpset.ncols, colour))
    {
      TASSERT(num_paths_seen < npaths);
      db_node_buf_reset(&nbuf);
      gpath_fetch(node, path, &nbuf, &jposbuf, colour, graph);
      if(nbuf.len > MAX_SEQ) die("Too many nodes. Cannot continue. %zu", nbuf.len);
      db_nodes_to_str(nbuf.b, nbuf.len, graph, seq);
      TASSERT(strlen(seq) == graph->kmer_size + nbuf.len - 1);
      for(i = 0; i < npaths; i++) {
        if(strcmp(path_strs[i],seq) == 0) {
          TASSERT(paths[i] == NULL, "Duplicate paths: %s", seq);
          paths[i] = path;
          break;
        }
      }
      TASSERT2(i < npaths, "Path not found: %s", seq);
      num_paths_seen++;
    }
  }

  TASSERT(num_paths_seen == npaths);

  for(i = 0; i < npaths; i++) {
    TASSERT2(paths[i] != NULL, "path not in graph: %s", path_strs[i]);
  }

  db_node_buf_dealloc(&nbuf);
  size_buf_dealloc(&jposbuf);
}
Example #3
0
static void pull_out_supernodes(const char **seq, const char **ans, size_t n,
                                const dBGraph *graph)
{
  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 1024);

  // 1. Check pulling out supernodes works for iterating over the graph
  uint64_t *visited;
  visited = ctx_calloc(roundup_bits2words64(graph->ht.capacity), 8);
  HASH_ITERATE(&graph->ht, supernode_from_kmer,
               &nbuf, visited, graph, ans, n);
  ctx_free(visited);

  // 2. Check pulling out supernodes works when we iterate over inputs
  size_t i, j, len;
  dBNode node;
  char tmpstr[SNODEBUF];

  for(i = 0; i < n; i++) {
    len = strlen(seq[i]);
    for(j = 0; j+graph->kmer_size <= len; j++)
    {
      // Find node
      node = db_graph_find_str(graph, seq[i]+j);
      TASSERT(node.key != HASH_NOT_FOUND);

      // Fetch supernode
      db_node_buf_reset(&nbuf);
      supernode_find(node.key, &nbuf, graph);
      supernode_normalise(nbuf.b, nbuf.len, graph);

      // Compare
      TASSERT(nbuf.len < SNODEBUF);
      db_nodes_to_str(nbuf.b, nbuf.len, graph, tmpstr);
      if(strcmp(tmpstr, ans[i]) != 0) {
        test_status("Got: %s from ans[i]:%s\n", tmpstr, ans[i]);
      }
      TASSERT(strcmp(tmpstr, ans[i]) == 0);
    }
  }

  db_node_buf_dealloc(&nbuf);
}
Example #4
0
static void _check_alleles(GraphCache *cache, GCacheStepPtrBuf *steps,
                           const char **alleles, size_t num_alleles,
                           dBNodeBuffer *nbuf, StrBuf *sbuf)
{
  TASSERT2(steps->len == num_alleles, "Number of alleles doesn't match");

  size_t i, j;
  for(i = 0; i < steps->len; i++)
  {
    db_node_buf_reset(nbuf);
    gc_step_fetch_nodes(cache, steps->b[i], nbuf);
    strbuf_ensure_capacity(sbuf, nbuf->len+MAX_KMER_SIZE+1);
    db_nodes_to_str(nbuf->b, nbuf->len, cache->db_graph, sbuf->b);

    // Find this node
    for(j = 0; j < num_alleles && strcasecmp(sbuf->b,alleles[j]); j++) {}
    TASSERT2(j < num_alleles, "Couldn't find allele: %s", sbuf->b);
  }
}
Example #5
0
static void supernode_from_kmer(hkey_t hkey, dBNodeBuffer *nbuf,
                                uint64_t *visited, const dBGraph *graph,
                                const char **ans, size_t n)
{
  size_t i;
  char tmpstr[SNODEBUF];

  if(!bitset_get(visited, hkey))
  {
    db_node_buf_reset(nbuf);
    supernode_find(hkey, nbuf, graph);
    for(i = 0; i < nbuf->len; i++) bitset_set(visited, nbuf->b[i].key);

    supernode_normalise(nbuf->b, nbuf->len, graph);

    TASSERT(nbuf->len < SNODEBUF);
    db_nodes_to_str(nbuf->b, nbuf->len, graph, tmpstr);
    for(i = 0; i < n && strcmp(tmpstr,ans[i]) != 0; i++);

    TASSERT2(i < n, "Got: %s", tmpstr);
  }
}
Example #6
0
// if colour is -1 aligns to all colours, otherwise aligns to given colour only
// Assumes both reads are in FF orientation
void db_alignment_from_reads(dBAlignment *alignment,
                             const read_t *r1, const read_t *r2,
                             uint8_t qcutoff1, uint8_t qcutoff2,
                             uint8_t hp_cutoff,
                             const dBGraph *db_graph, int colour)
{
  ctx_assert(colour == -1 || db_graph->node_in_cols != NULL);

  db_node_buf_reset(&alignment->nodes);
  int32_buf_reset(&alignment->rpos);
  alignment->seq_gaps = false;
  alignment->r2enderr = 0;
  alignment->passed_r2 = (r2 != NULL);
  alignment->colour = colour;
  alignment->r1bases = r1->seq.end;
  alignment->r2bases = r2 ? r2->seq.end : 0;

  alignment->r1enderr = db_alignment_from_read(alignment, r1,
                                               qcutoff1, hp_cutoff,
                                               db_graph, colour);

  alignment->r2strtidx = alignment->nodes.len;

  if(r2 != NULL) {
    alignment->r2enderr = db_alignment_from_read(alignment, r2,
                                                 qcutoff2, hp_cutoff,
                                                 db_graph, colour);
  }

  alignment->used_r1 = (alignment->r1enderr < r1->seq.end);
  alignment->used_r2 = (r2 != NULL && alignment->r2enderr < r2->seq.end);

  #ifdef CTXVERBOSE
    db_alignment_print(alignment);
  #endif
}
Example #7
0
void test_graph_crawler()
{
  test_status("Testing graph crawler...");

  // Construct 1 colour graph with kmer-size=11
  dBGraph graph;
  const size_t kmer_size = 11, ncols = 3;

  db_graph_alloc(&graph, kmer_size, ncols, 1, 2048,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL | DBG_ALLOC_BKTLOCKS);

  char graphseq[3][77] =
//           <               X                 X              X...............
{"GTTCCAGAGCGGAGGTCTCCCAACAACATGGTATAAGTTGTCTAGCCCCGGTTCGCGCGGGTACTTCTTACAGCGC",
 "GTTCCAGAGCGGAGGTCTCCCAACAACTTGGTATAAGTTGTCTAGTCCCGGTTCGCGCGGCATTTCAGCATTGTTA",
 "GTTCCAGAGCGCGACAGAGTGCATATCACGCTAAGCACAGCCCTCTTCTATCTGCTTTTAAATGGATCAATAATCG"};

  build_graph_from_str_mt(&graph, 0, graphseq[0], strlen(graphseq[0]));
  build_graph_from_str_mt(&graph, 1, graphseq[1], strlen(graphseq[1]));
  build_graph_from_str_mt(&graph, 2, graphseq[2], strlen(graphseq[2]));

  // Crawl graph
  GraphCrawler crawler;
  graph_crawler_alloc(&crawler, &graph);

  dBNode node = db_graph_find_str(&graph, graphseq[0]);
  dBNode next_node = db_graph_find_str(&graph, graphseq[0]+1);
  TASSERT(node.key != HASH_NOT_FOUND);
  TASSERT(next_node.key != HASH_NOT_FOUND);

  BinaryKmer bkey = db_node_get_bkmer(&graph, node.key);
  Edges edges = db_node_get_edges(&graph, node.key, 0);

  dBNode next_nodes[4];
  Nucleotide next_nucs[4];
  size_t i, p, num_next, next_idx;

  num_next = db_graph_next_nodes(&graph, bkey, node.orient, edges,
                                 next_nodes, next_nucs);

  next_idx = 0;
  while(next_idx < num_next && !db_nodes_are_equal(next_nodes[next_idx],next_node))
    next_idx++;

  TASSERT(next_idx < num_next && db_nodes_are_equal(next_nodes[next_idx],next_node));

  // Crawl in all colours
  graph_crawler_fetch(&crawler, node, next_nodes, next_idx, num_next,
                      NULL, graph.num_of_cols, NULL, NULL, NULL);

  TASSERT2(crawler.num_paths == 2, "crawler.num_paths: %u", crawler.num_paths);

  // Fetch paths
  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 16);
  StrBuf sbuf;
  strbuf_alloc(&sbuf, 128);

  for(p = 0; p < crawler.num_paths; p++) {
    db_node_buf_reset(&nbuf);
    graph_crawler_get_path_nodes(&crawler, p, &nbuf);
    strbuf_ensure_capacity(&sbuf, nbuf.len+graph.kmer_size);
    sbuf.end = db_nodes_to_str(nbuf.b, nbuf.len, &graph, sbuf.b);
    for(i = 0; i < 3 && strcmp(graphseq[i]+1,sbuf.b) != 0; i++) {}
    TASSERT2(i < 3, "seq: %s", sbuf.b);
    TASSERT2(sbuf.end == 75, "sbuf.end: %zu", sbuf.end);
    TASSERT2(nbuf.len == 65, "nbuf.len: %zu", nbuf.len);
  }

  strbuf_dealloc(&sbuf);
  db_node_buf_dealloc(&nbuf);

  graph_crawler_dealloc(&crawler);

  db_graph_dealloc(&graph);
}
Example #8
0
/**
 * Print paths to a string buffer. Paths are sorted before being written.
 *
 * @param hkey    All paths associated with hkey are written to the buffer
 * @param sbuf    paths are written this string buffer
 * @param subset  is a temp variable that is reused each time
 * @param nbuf    temporary buffer, if not NULL, used to add seq=... to output
 * @param jposbuf temporary buffer, if not NULL, used to add juncpos=... to output
 */
void gpath_save_sbuf(hkey_t hkey, StrBuf *sbuf, GPathSubset *subset,
                     dBNodeBuffer *nbuf, SizeBuffer *jposbuf,
                     const dBGraph *db_graph)
{
  ctx_assert(db_graph->num_of_cols == 1 || nbuf == NULL);
  ctx_assert(db_graph->num_of_cols == 1 || jposbuf == NULL);

  const GPathStore *gpstore = &db_graph->gpstore;
  const GPathSet *gpset = &gpstore->gpset;
  const size_t ncols = gpstore->gpset.ncols;
  GPath *first_gpath = gpath_store_fetch(gpstore, hkey);
  const GPath *gpath;
  size_t i, j, col;

  // Load and sort paths for given kmer
  gpath_subset_reset(subset);
  gpath_subset_load_llist(subset, first_gpath);
  gpath_subset_sort(subset);

  if(subset->list.len == 0) return;

  // Print "<kmer> <npaths>"
  BinaryKmer bkmer = db_graph->ht.table[hkey];
  char bkstr[MAX_KMER_SIZE+1];
  binary_kmer_to_str(bkmer, db_graph->kmer_size, bkstr);

  // strbuf_sprintf(sbuf, "%s %zu\n", bkstr, subset->list.len);
  strbuf_append_strn(sbuf, bkstr, db_graph->kmer_size);
  strbuf_append_char(sbuf, ' ');
  strbuf_append_ulong(sbuf, subset->list.len);
  strbuf_append_char(sbuf, '\n');

  char orchar[2] = {0};
  orchar[FORWARD] = 'F';
  orchar[REVERSE] = 'R';
  const uint8_t *nseenptr;

  for(i = 0; i < subset->list.len; i++)
  {
    gpath = subset->list.b[i];
    nseenptr = gpath_set_get_nseen(gpset, gpath);

    // strbuf_sprintf(sbuf, "%c %zu %u %u", orchar[gpath->orient], klen,
    //                                      gpath->num_juncs, (uint32_t)nseenptr[0]);

    strbuf_append_char(sbuf, orchar[gpath->orient]);
    strbuf_append_char(sbuf, ' ');
    strbuf_append_ulong(sbuf, gpath->num_juncs);
    strbuf_append_char(sbuf, ' ');
    strbuf_append_ulong(sbuf, nseenptr[0]);

    for(col = 1; col < ncols; col++) {
      // strbuf_sprintf(sbuf, ",%u", (uint32_t)nseenptr[col]);
      strbuf_append_char(sbuf, ',');
      strbuf_append_ulong(sbuf, nseenptr[col]);
    }

    strbuf_append_char(sbuf, ' ');
    strbuf_ensure_capacity(sbuf, sbuf->end + gpath->num_juncs + 2);
    binary_seq_to_str(gpath->seq, gpath->num_juncs, sbuf->b+sbuf->end);
    sbuf->end += gpath->num_juncs;

    if(nbuf)
    {
      // Trace this path through the graph
      // First, find a colour this path is in
      for(col = 0; col < ncols && !gpath_has_colour(gpath, ncols, col); col++) {}
      if(col == ncols) die("path is not in any colours");

      dBNode node = {.key = hkey, .orient = gpath->orient};
      db_node_buf_reset(nbuf);
      if(jposbuf) size_buf_reset(jposbuf); // indices of junctions in nbuf
      gpath_fetch(node, gpath, nbuf, jposbuf, col, db_graph);

      strbuf_append_str(sbuf, " seq=");
      strbuf_ensure_capacity(sbuf, sbuf->end + db_graph->kmer_size + nbuf->len);
      sbuf->end += db_nodes_to_str(nbuf->b, nbuf->len, db_graph,
                                   sbuf->b+sbuf->end);

      if(jposbuf) {
        strbuf_append_str(sbuf, " juncpos=");
        strbuf_append_ulong(sbuf, jposbuf->b[0]);

        for(j = 1; j < jposbuf->len; j++) {
          strbuf_append_char(sbuf, ',');
          strbuf_append_ulong(sbuf, jposbuf->b[j]);
        }
      }
    }

    strbuf_append_char(sbuf, '\n');
  }
}

// @subset is a temp variable that is reused each time
// @sbuf   is a temp variable that is reused each time
static inline int _gpath_gzsave_node(hkey_t hkey,
                                     StrBuf *sbuf, GPathSubset *subset,
                                     dBNodeBuffer *nbuf, SizeBuffer *jposbuf,
                                     gzFile gzout, pthread_mutex_t *outlock,
                                     const dBGraph *db_graph)
{
  gpath_save_sbuf(hkey, sbuf, subset, nbuf, jposbuf, db_graph);

  if(sbuf->end > DEFAULT_IO_BUFSIZE)
    _gpath_save_flush(gzout, sbuf, outlock);

  return 0; // => keep iterating
}
// Walk the graph remembering the last time we met the ref
// When traversal fails, dump sequence up to last meeting with the ref
static void follow_break(BreakpointCaller *caller, dBNode node)
{
  size_t i, j, k, num_next;
  dBNode next_nodes[4];
  Nucleotide next_nucs[4];
  size_t nonref_idx[4], num_nonref_next = 0;
  const dBGraph *db_graph = caller->db_graph;

  BinaryKmer bkey = db_node_get_bkmer(db_graph, node.key);
  Edges edges = db_node_get_edges(db_graph, node.key, 0);

  num_next = db_graph_next_nodes(db_graph, bkey, node.orient, edges,
                                 next_nodes, next_nucs);

  // Filter out next nodes in the reference
  for(i = 0; i < num_next; i++) {
    if(kograph_num(caller->kograph, next_nodes[i].key) == 0) {
      nonref_idx[num_nonref_next] = i;
      num_nonref_next++;
    }
  }

  // Abandon if all options are in ref or none are
  if(num_nonref_next == num_next || num_nonref_next == 0) return;

  // Follow all paths not in ref, in all colours
  GraphCrawler *fw_crawler = &caller->crawlers[node.orient];
  GraphCrawler *rv_crawler = &caller->crawlers[!node.orient];
  dBNodeBuffer *allelebuf = &caller->allelebuf, *flank5pbuf = &caller->flank5pbuf;
  GCMultiColPath *flank5p_multicolpath, *allele_multicolpath;
  KOccurRun *flank5p_runs, *flank3p_runs;
  size_t flank5p_pathid, allele_pathid;
  size_t num_flank5p_runs, num_flank3p_runs;

  // We fetch 5' flanks in all colours then merge matching paths
  // we stop fetching a single path if it stops tracking the reference
  // Alternatively, we could fetch the 5' flank in everyone and stop after a
  // given distance, then check for that set of paths how much it tracks the
  // reference. This has the advantage of scaling much better with number of
  // samples, but not so well as min_ref_nkmers increases (since we fetch
  // many flanks that can't be used) - I think this is less of a worry.

  // Loop over possible next nodes at this junction
  for(i = 0; i < num_nonref_next; i++)
  {
    size_t next_idx = nonref_idx[i];

    // Go backwards to get 5p flank
    traverse_5pflank(caller, rv_crawler, db_node_reverse(next_nodes[next_idx]),
                     db_node_reverse(node));

    // Loop over the flanks we got
    for(j = 0; j < rv_crawler->num_paths; j++)
    {
      // Get 5p flank
      db_node_buf_reset(flank5pbuf);
      graph_crawler_get_path_nodes(rv_crawler, j, flank5pbuf);
      flank5p_multicolpath = &rv_crawler->multicol_paths[j];
      flank5p_pathid = flank5p_multicolpath->pathid;

      // Fetch 3pflank ref position
      num_flank5p_runs = caller->flank5p_refs[flank5p_pathid].num_runs;
      flank5p_runs = fetch_ref_contact(&rv_crawler->cache, flank5p_pathid,
                                       caller->flank5p_refs,
                                       &caller->flank5p_run_buf);

      koruns_reverse(flank5p_runs, num_flank5p_runs, flank5pbuf->len);
      koruns_sort_by_qoffset(flank5p_runs, num_flank5p_runs);
      db_nodes_reverse_complement(flank5pbuf->data, flank5pbuf->len);

      if(num_flank5p_runs > 0)
      {
        // Reset caller
        kmer_run_buf_reset(&caller->koruns_3p);
        kmer_run_buf_reset(&caller->koruns_3p_ended);
        kmer_run_buf_reset(&caller->allele_run_buf);

        // functions gcrawler_path_stop_at_ref_covg(),
        //           gcrawler_path_finish_ref_covg()
        // both fill koruns_3p, koruns_3p_ended and allele_run_buf

        // Only traverse in the colours we have a flank for
        graph_crawler_fetch(fw_crawler, node,
                            next_nodes, next_nucs, next_idx, num_next,
                            flank5p_multicolpath->cols,
                            flank5p_multicolpath->num_cols,
                            gcrawler_path_stop_at_ref_covg,
                            gcrawler_path_finish_ref_covg,
                            caller);

        // Assemble contigs - fetch forwards for each path for given 5p flank
        for(k = 0; k < fw_crawler->num_paths; k++)
        {
          // Fetch nodes
          db_node_buf_reset(allelebuf);
          graph_crawler_get_path_nodes(fw_crawler, k, allelebuf);
          ctx_assert(allelebuf->len > 0);

          allele_multicolpath = &fw_crawler->multicol_paths[k];
          allele_pathid = allele_multicolpath->pathid;

          // Fetch 3pflank ref position
          num_flank3p_runs = caller->allele_refs[allele_pathid].num_runs;
          flank3p_runs = fetch_ref_contact(&fw_crawler->cache, allele_pathid,
                                           caller->allele_refs,
                                           &caller->allele_run_buf);

          process_contig(caller,
                         allele_multicolpath->cols,
                         allele_multicolpath->num_cols,
                         flank5pbuf, allelebuf,
                         flank5p_runs, num_flank5p_runs,
                         flank3p_runs, num_flank3p_runs);
        }
      }
    }
  }
}
Example #10
0
// Potential bubble - filter ref and duplicate alleles
static void print_bubble(BubbleCaller *caller,
                         GCacheStep **steps, size_t num_paths)
{
  const BubbleCallingPrefs prefs = caller->prefs;
  const dBGraph *db_graph = caller->db_graph;
  GCacheSnode *snode;
  size_t i;

  dBNodeBuffer *flank5p = &caller->flank5p;
  if(flank5p->len == 0)
  {
    // Haven't fetched 5p flank yet
    // flank5p[0] already contains the first node
    flank5p->len = 1;
    supernode_extend(flank5p, prefs.max_flank_len, db_graph);
    db_nodes_reverse_complement(flank5p->b, flank5p->len);
  }

  //
  // Print Bubble
  //

  // write to string buffer then flush to gzFile
  StrBuf *sbuf = &caller->output_buf;
  strbuf_reset(sbuf);

  // Temporary node buffer to use
  dBNodeBuffer *pathbuf = &caller->pathbuf;
  db_node_buf_reset(pathbuf);

  // Get bubble number (threadsafe num_bubbles_ptr++)
  size_t id = __sync_fetch_and_add((volatile size_t*)caller->num_bubbles_ptr, 1);

  // This can be set to anything without a '.' in it
  const char prefix[] = "call";

  // 5p flank
  // strbuf_sprintf(sbuf, ">bubble.%s%zu.5pflank kmers=%zu\n", prefix, id, flank5p->len);
  strbuf_append_str(sbuf, ">bubble.");
  strbuf_append_str(sbuf, prefix);
  strbuf_append_ulong(sbuf, id);
  strbuf_append_str(sbuf, ".5pflank kmers=");
  strbuf_append_ulong(sbuf, flank5p->len);
  strbuf_append_char(sbuf, '\n');
  branch_to_str(flank5p->b, flank5p->len, true, sbuf, db_graph);

  // 3p flank
  db_node_buf_reset(pathbuf);
  snode = graph_cache_snode(&caller->cache, steps[0]->supernode);
  graph_cache_snode_fetch_nodes(&caller->cache, snode, steps[0]->orient, pathbuf);

  // strbuf_sprintf(sbuf, ">bubble.%s%zu.3pflank kmers=%zu\n", prefix, id, pathbuf->len);
  strbuf_append_str(sbuf, ">bubble.");
  strbuf_append_str(sbuf, prefix);
  strbuf_append_ulong(sbuf, id);
  strbuf_append_str(sbuf, ".3pflank kmers=");
  strbuf_append_ulong(sbuf, pathbuf->len);
  strbuf_append_char(sbuf, '\n');
  branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph);

  // Print alleles
  for(i = 0; i < num_paths; i++)
  {
    db_node_buf_reset(pathbuf);
    graph_cache_step_fetch_nodes(&caller->cache, steps[i], pathbuf);

    // strbuf_sprintf(sbuf, ">bubble.%s%zu.branch.%zu kmers=%zu\n",
    //                prefix, id, i, pathbuf->len);
    strbuf_append_str(sbuf, ">bubble.");
    strbuf_append_str(sbuf, prefix);
    strbuf_append_ulong(sbuf, id);
    strbuf_append_str(sbuf, ".branch.");
    strbuf_append_ulong(sbuf, i);
    strbuf_append_str(sbuf, " kmers=");
    strbuf_append_ulong(sbuf, pathbuf->len);
    strbuf_append_char(sbuf, '\n');

    branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph);
  }

  strbuf_append_char(sbuf, '\n');

  ctx_assert(strlen(sbuf->b) == sbuf->end);

  // lock, print, unlock
  pthread_mutex_lock(caller->out_lock);
  gzwrite(caller->gzout, sbuf->b, sbuf->end);
  pthread_mutex_unlock(caller->out_lock);
}
Example #11
0
static inline
int test_statement_node(dBNode node, ExpABCWorker *wrkr)
{
  const dBGraph *db_graph = wrkr->db_graph;
  dBNodeBuffer *nbuf = &wrkr->nbuf;
  GraphWalker *wlk = &wrkr->gwlk;
  RepeatWalker *rpt = &wrkr->rptwlk;
  size_t b_idx, col = wrkr->colour;

  // rpt_walker_clear(rpt);

  db_node_buf_reset(nbuf);
  db_node_buf_add(nbuf, node);

  // size_t AB_limit = wrkr->prime_AB ? SIZE_MAX : wrkr->max_AB_dist;
  size_t walk_limit = wrkr->max_AB_dist;
  // status("walk_limit: %zu", walk_limit);

  // Walk from B to find A
  graph_walker_setup(wlk, true, col, col, db_graph);
  graph_walker_start(wlk, nbuf->b[0]);

  while(graph_walker_next(wlk) && nbuf->len < walk_limit) {
    if(!rpt_walker_attempt_traverse(rpt, wlk)) {
      reset(wlk,rpt,nbuf); return RES_LOST_IN_RPT;
    }
    db_node_buf_add(nbuf, wlk->node);
  }

  reset(wlk,rpt,nbuf);

  if(nbuf->len == 1) return RES_NO_TRAVERSAL;

  // Traverse A->B
  db_nodes_reverse_complement(nbuf->b, nbuf->len);
  b_idx = nbuf->len - 1;

  if(wrkr->prime_AB)
  {
    // Prime A->B without attempting to cross
    graph_walker_prime(wlk, nbuf->b, nbuf->len, nbuf->len, true);

    while(graph_walker_next(wlk)) {
      if(!rpt_walker_attempt_traverse(rpt, wlk)) {
        reset(wlk,rpt,nbuf); return RES_LOST_IN_RPT;
      }
      db_node_buf_add(nbuf, wlk->node);
    }
  }
  else
  {
    // Attempt to traverse A->B then extend past B
    int r = confirm_seq(0, true, wlk, rpt, nbuf, col, db_graph);
    switch(r) {
      case CONFIRM_REPEAT: return RES_LOST_IN_RPT;
      case CONFIRM_OVERSHOT: ctx_assert2(0,"Can't 'overshoot' when extending");
      case CONFIRM_WRONG: return RES_AB_WRONG;
      case CONFIRM_SHORT:
        if(wrkr->print_failed_contigs)
          print_failed(node, nbuf, db_graph, true, wrkr->prime_AB);
        wrkr->ab_fail_state[wlk->last_step.status]++;
        return RES_AB_FAILED;
    }
  }

  reset(wlk,rpt,nbuf);

  if(nbuf->len == b_idx+1) return RES_NO_TRAVERSAL; // Couldn't get past B

  // Last node is now C
  // Walk from B... record whether or not we reach C
  ctx_assert(db_nodes_are_equal(nbuf->b[b_idx], db_node_reverse(node)));

  int r = confirm_seq(b_idx, false, wlk, rpt, nbuf, col, db_graph);
  switch(r) {
    case CONFIRM_REPEAT: return RES_LOST_IN_RPT;
    case CONFIRM_OVERSHOT: return RES_BC_OVERSHOT;
    case CONFIRM_WRONG: return RES_BC_WRONG;
    case CONFIRM_SHORT:
      if(wrkr->print_failed_contigs)
        print_failed(node, nbuf, db_graph, false, wrkr->prime_AB);
      wrkr->bc_fail_state[wlk->last_step.status]++;
      return RES_BC_FAILED;
    case CONFIRM_SUCCESS: return RES_ABC_SUCCESS;
  }

  die("Shouldn't reach here: r=%i", r);
  return -1;
}