Example #1
0
// Parse path and create FileFilter, calls die() with msg on error
// fltr should be zero'd before call
void file_filter_open(FileFilter *fltr, const char *path)
{
  const char *path_start, *path_end;
  size_t path_len;

  // Duplicate input string and file path
  strbuf_set(&fltr->input, path);
  file_filter_deconstruct_path(path, &path_start, &path_end);
  path_len = path_end - path_start;
  strbuf_ensure_capacity(&fltr->path, path_len);
  memcpy(fltr->path.b, path_start, path_len);
  fltr->path.b[fltr->path.end = path_len] = '\0';
}
Example #2
0
// Strip indels ('-') from allele and add to string buffer
static inline void print_vcf_allele(const char *allele, size_t len,
                                    int8_t prev_base, int8_t next_base,
                                    StrBuf *sbuf)
{
  size_t i;
  if(prev_base > 0) strbuf_append_char(sbuf, char_to_vcf_char(prev_base));
  strbuf_ensure_capacity(sbuf, sbuf->end+len);
  for(i = 0; i < len; i++) {
    if(allele[i] != '-')
      sbuf->b[sbuf->end++] = char_to_vcf_char(allele[i]);
  }
  sbuf->b[sbuf->end] = 0;
  if(next_base > 0) strbuf_append_char(sbuf, char_to_vcf_char(next_base));
}
Example #3
0
static void branch_to_str(const dBNode *nodes, size_t len, bool print_first_kmer,
                          StrBuf *sbuf, const dBGraph *db_graph)
{
  size_t i = print_first_kmer, kmer_size = db_graph->kmer_size;
  Nucleotide nuc;
  BinaryKmer bkmer;

  if(print_first_kmer) {
    strbuf_ensure_capacity(sbuf, sbuf->end + kmer_size);
    bkmer = db_node_oriented_bkmer(db_graph, nodes[0]);
    binary_kmer_to_str(bkmer, kmer_size, sbuf->b+sbuf->end);
    sbuf->end += kmer_size;
  }

  // i == 1 if print_first_kmer, otherwise 0
  strbuf_ensure_capacity(sbuf, sbuf->end + len + 1); // +1 for '\n'
  for(; i < len; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    sbuf->b[sbuf->end++] = dna_nuc_to_char(nuc);
  }

  sbuf->b[sbuf->end++] = '\n';
  sbuf->b[sbuf->end] = '\0';
}
Example #4
0
static void _check_alleles(GraphCache *cache, GCacheStepPtrBuf *steps,
                           const char **alleles, size_t num_alleles,
                           dBNodeBuffer *nbuf, StrBuf *sbuf)
{
  TASSERT2(steps->len == num_alleles, "Number of alleles doesn't match");

  size_t i, j;
  for(i = 0; i < steps->len; i++)
  {
    db_node_buf_reset(nbuf);
    gc_step_fetch_nodes(cache, steps->b[i], nbuf);
    strbuf_ensure_capacity(sbuf, nbuf->len+MAX_KMER_SIZE+1);
    db_nodes_to_str(nbuf->b, nbuf->len, cache->db_graph, sbuf->b);

    // Find this node
    for(j = 0; j < num_alleles && strcasecmp(sbuf->b,alleles[j]); j++) {}
    TASSERT2(j < num_alleles, "Couldn't find allele: %s", sbuf->b);
  }
}
Example #5
0
void test_graph_crawler()
{
  test_status("Testing graph crawler...");

  // Construct 1 colour graph with kmer-size=11
  dBGraph graph;
  const size_t kmer_size = 11, ncols = 3;

  db_graph_alloc(&graph, kmer_size, ncols, 1, 2048,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL | DBG_ALLOC_BKTLOCKS);

  char graphseq[3][77] =
//           <               X                 X              X...............
{"GTTCCAGAGCGGAGGTCTCCCAACAACATGGTATAAGTTGTCTAGCCCCGGTTCGCGCGGGTACTTCTTACAGCGC",
 "GTTCCAGAGCGGAGGTCTCCCAACAACTTGGTATAAGTTGTCTAGTCCCGGTTCGCGCGGCATTTCAGCATTGTTA",
 "GTTCCAGAGCGCGACAGAGTGCATATCACGCTAAGCACAGCCCTCTTCTATCTGCTTTTAAATGGATCAATAATCG"};

  build_graph_from_str_mt(&graph, 0, graphseq[0], strlen(graphseq[0]));
  build_graph_from_str_mt(&graph, 1, graphseq[1], strlen(graphseq[1]));
  build_graph_from_str_mt(&graph, 2, graphseq[2], strlen(graphseq[2]));

  // Crawl graph
  GraphCrawler crawler;
  graph_crawler_alloc(&crawler, &graph);

  dBNode node = db_graph_find_str(&graph, graphseq[0]);
  dBNode next_node = db_graph_find_str(&graph, graphseq[0]+1);
  TASSERT(node.key != HASH_NOT_FOUND);
  TASSERT(next_node.key != HASH_NOT_FOUND);

  BinaryKmer bkey = db_node_get_bkmer(&graph, node.key);
  Edges edges = db_node_get_edges(&graph, node.key, 0);

  dBNode next_nodes[4];
  Nucleotide next_nucs[4];
  size_t i, p, num_next, next_idx;

  num_next = db_graph_next_nodes(&graph, bkey, node.orient, edges,
                                 next_nodes, next_nucs);

  next_idx = 0;
  while(next_idx < num_next && !db_nodes_are_equal(next_nodes[next_idx],next_node))
    next_idx++;

  TASSERT(next_idx < num_next && db_nodes_are_equal(next_nodes[next_idx],next_node));

  // Crawl in all colours
  graph_crawler_fetch(&crawler, node, next_nodes, next_idx, num_next,
                      NULL, graph.num_of_cols, NULL, NULL, NULL);

  TASSERT2(crawler.num_paths == 2, "crawler.num_paths: %u", crawler.num_paths);

  // Fetch paths
  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 16);
  StrBuf sbuf;
  strbuf_alloc(&sbuf, 128);

  for(p = 0; p < crawler.num_paths; p++) {
    db_node_buf_reset(&nbuf);
    graph_crawler_get_path_nodes(&crawler, p, &nbuf);
    strbuf_ensure_capacity(&sbuf, nbuf.len+graph.kmer_size);
    sbuf.end = db_nodes_to_str(nbuf.b, nbuf.len, &graph, sbuf.b);
    for(i = 0; i < 3 && strcmp(graphseq[i]+1,sbuf.b) != 0; i++) {}
    TASSERT2(i < 3, "seq: %s", sbuf.b);
    TASSERT2(sbuf.end == 75, "sbuf.end: %zu", sbuf.end);
    TASSERT2(nbuf.len == 65, "nbuf.len: %zu", nbuf.len);
  }

  strbuf_dealloc(&sbuf);
  db_node_buf_dealloc(&nbuf);

  graph_crawler_dealloc(&crawler);

  db_graph_dealloc(&graph);
}
Example #6
0
/**
 * Print paths to a string buffer. Paths are sorted before being written.
 *
 * @param hkey    All paths associated with hkey are written to the buffer
 * @param sbuf    paths are written this string buffer
 * @param subset  is a temp variable that is reused each time
 * @param nbuf    temporary buffer, if not NULL, used to add seq=... to output
 * @param jposbuf temporary buffer, if not NULL, used to add juncpos=... to output
 */
void gpath_save_sbuf(hkey_t hkey, StrBuf *sbuf, GPathSubset *subset,
                     dBNodeBuffer *nbuf, SizeBuffer *jposbuf,
                     const dBGraph *db_graph)
{
  ctx_assert(db_graph->num_of_cols == 1 || nbuf == NULL);
  ctx_assert(db_graph->num_of_cols == 1 || jposbuf == NULL);

  const GPathStore *gpstore = &db_graph->gpstore;
  const GPathSet *gpset = &gpstore->gpset;
  const size_t ncols = gpstore->gpset.ncols;
  GPath *first_gpath = gpath_store_fetch(gpstore, hkey);
  const GPath *gpath;
  size_t i, j, col;

  // Load and sort paths for given kmer
  gpath_subset_reset(subset);
  gpath_subset_load_llist(subset, first_gpath);
  gpath_subset_sort(subset);

  if(subset->list.len == 0) return;

  // Print "<kmer> <npaths>"
  BinaryKmer bkmer = db_graph->ht.table[hkey];
  char bkstr[MAX_KMER_SIZE+1];
  binary_kmer_to_str(bkmer, db_graph->kmer_size, bkstr);

  // strbuf_sprintf(sbuf, "%s %zu\n", bkstr, subset->list.len);
  strbuf_append_strn(sbuf, bkstr, db_graph->kmer_size);
  strbuf_append_char(sbuf, ' ');
  strbuf_append_ulong(sbuf, subset->list.len);
  strbuf_append_char(sbuf, '\n');

  char orchar[2] = {0};
  orchar[FORWARD] = 'F';
  orchar[REVERSE] = 'R';
  const uint8_t *nseenptr;

  for(i = 0; i < subset->list.len; i++)
  {
    gpath = subset->list.b[i];
    nseenptr = gpath_set_get_nseen(gpset, gpath);

    // strbuf_sprintf(sbuf, "%c %zu %u %u", orchar[gpath->orient], klen,
    //                                      gpath->num_juncs, (uint32_t)nseenptr[0]);

    strbuf_append_char(sbuf, orchar[gpath->orient]);
    strbuf_append_char(sbuf, ' ');
    strbuf_append_ulong(sbuf, gpath->num_juncs);
    strbuf_append_char(sbuf, ' ');
    strbuf_append_ulong(sbuf, nseenptr[0]);

    for(col = 1; col < ncols; col++) {
      // strbuf_sprintf(sbuf, ",%u", (uint32_t)nseenptr[col]);
      strbuf_append_char(sbuf, ',');
      strbuf_append_ulong(sbuf, nseenptr[col]);
    }

    strbuf_append_char(sbuf, ' ');
    strbuf_ensure_capacity(sbuf, sbuf->end + gpath->num_juncs + 2);
    binary_seq_to_str(gpath->seq, gpath->num_juncs, sbuf->b+sbuf->end);
    sbuf->end += gpath->num_juncs;

    if(nbuf)
    {
      // Trace this path through the graph
      // First, find a colour this path is in
      for(col = 0; col < ncols && !gpath_has_colour(gpath, ncols, col); col++) {}
      if(col == ncols) die("path is not in any colours");

      dBNode node = {.key = hkey, .orient = gpath->orient};
      db_node_buf_reset(nbuf);
      if(jposbuf) size_buf_reset(jposbuf); // indices of junctions in nbuf
      gpath_fetch(node, gpath, nbuf, jposbuf, col, db_graph);

      strbuf_append_str(sbuf, " seq=");
      strbuf_ensure_capacity(sbuf, sbuf->end + db_graph->kmer_size + nbuf->len);
      sbuf->end += db_nodes_to_str(nbuf->b, nbuf->len, db_graph,
                                   sbuf->b+sbuf->end);

      if(jposbuf) {
        strbuf_append_str(sbuf, " juncpos=");
        strbuf_append_ulong(sbuf, jposbuf->b[0]);

        for(j = 1; j < jposbuf->len; j++) {
          strbuf_append_char(sbuf, ',');
          strbuf_append_ulong(sbuf, jposbuf->b[j]);
        }
      }
    }

    strbuf_append_char(sbuf, '\n');
  }
}

// @subset is a temp variable that is reused each time
// @sbuf   is a temp variable that is reused each time
static inline int _gpath_gzsave_node(hkey_t hkey,
                                     StrBuf *sbuf, GPathSubset *subset,
                                     dBNodeBuffer *nbuf, SizeBuffer *jposbuf,
                                     gzFile gzout, pthread_mutex_t *outlock,
                                     const dBGraph *db_graph)
{
  gpath_save_sbuf(hkey, sbuf, subset, nbuf, jposbuf, db_graph);

  if(sbuf->end > DEFAULT_IO_BUFSIZE)
    _gpath_save_flush(gzout, sbuf, outlock);

  return 0; // => keep iterating
}