Example #1
0
// Return 1 if changed; 0 otherwise
bool infer_pop_edges(const BinaryKmer node_bkey, Edges *edges,
                     const Covg *covgs, const dBGraph *db_graph)
{
  Edges uedges = 0, iedges = 0xf, add_edges, edge;
  size_t orient, nuc, col, kmer_size = db_graph->kmer_size;
  const size_t ncols = db_graph->num_of_cols;
  BinaryKmer bkey, bkmer;
  hkey_t next;
  Edges newedges[ncols];

  // char tmp[MAX_KMER_SIZE+1];
  // binary_kmer_to_str(node_bkey, db_graph->kmer_size, tmp);
  // status("Inferring %s", tmp);

  for(col = 0; col < ncols; col++) {
    uedges |= edges[col]; // union of edges
    iedges &= edges[col]; // intersection of edges
    newedges[col] = edges[col];
  }

  add_edges = uedges & ~iedges;

  if(!add_edges) return 0;

  for(orient = 0; orient < 2; orient++)
  {
    bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size)
                               : binary_kmer_right_shift_one_base(node_bkey));

    for(nuc = 0; nuc < 4; nuc++)
    {
      edge = nuc_orient_to_edge(nuc, orient);
      if(add_edges & edge)
      {
        // get next bkmer, look up in graph
        if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc);
        else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size);

        bkey = bkmer_get_key(bkmer, kmer_size);
        next = hash_table_find(&db_graph->ht, bkey);
        ctx_assert(next != HASH_NOT_FOUND);

        for(col = 0; col < ncols; col++)
          if(covgs[col] > 0 && db_node_has_col(db_graph, next, col))
            newedges[col] |= edge;
      }
    }
  }

  int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols);
  memcpy(edges, newedges, sizeof(Edges)*ncols);
  return (cmp != 0);
}
Example #2
0
// Return 1 if changed; 0 otherwise
bool infer_all_edges(const BinaryKmer node_bkey, Edges *edges,
                     const Covg *covgs, const dBGraph *db_graph)
{
  Edges iedges = 0xff, edge;
  size_t orient, nuc, col, kmer_size = db_graph->kmer_size;
  const size_t ncols = db_graph->num_of_cols;
  BinaryKmer bkey, bkmer;
  hkey_t next;

  Edges newedges[ncols];
  memcpy(newedges, edges, ncols * sizeof(Edges));

  // intersection of edges
  for(col = 0; col < ncols; col++) iedges &= edges[col];

  for(orient = 0; orient < 2; orient++)
  {
    bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size)
                               : binary_kmer_right_shift_one_base(node_bkey));

    for(nuc = 0; nuc < 4; nuc++)
    {
      edge = nuc_orient_to_edge(nuc, orient);
      if(!(iedges & edge))
      {
        // edges are missing from some samples
        if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc);
        else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size);

        bkey = bkmer_get_key(bkmer, kmer_size);
        next = hash_table_find(&db_graph->ht, bkey);

        if(next != HASH_NOT_FOUND) {
          for(col = 0; col < ncols; col++) {
            if(covgs[col] > 0 && db_node_has_col(db_graph, next, col)) {
              newedges[col] |= edge;
            }
          }
        }
      }
    }
  }

  // Check if we changed the edges
  int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols);
  memcpy(edges, newedges, sizeof(Edges)*ncols);
  return (cmp != 0);
}
static char* binary_kmer_to_seq(uint64_t* bkmer, char * seq,
                                int kmer_size, int num_of_bitfields)
{
  uint64_t local_bkmer[num_of_bitfields];

  int i;

  // Copy over a word at a time
  for(i = 0; i < num_of_bitfields; i++)
  {
    local_bkmer[i] = bkmer[i];
  }

  // Loop backwards over bases
  for(i = kmer_size-1; i >= 0; i--)
  {
    seq[i] = binary_nucleotide_to_char(local_bkmer[num_of_bitfields-1] & 0x3);
    binary_kmer_right_shift_one_base(local_bkmer, num_of_bitfields);
  }

  seq[kmer_size] = '\0';

  return seq;
}
Example #4
0
// if colour is -1 aligns to all colours, otherwise aligns to given colour only
// Returns number of kmers lost from the end
static size_t db_alignment_from_read(dBAlignment *aln, const read_t *r,
                                     uint8_t qcutoff, uint8_t hp_cutoff,
                                     const dBGraph *db_graph, int colour)
{
  size_t contig_start, contig_end = 0, search_start = 0;
  const size_t kmer_size = db_graph->kmer_size;

  BinaryKmer bkmer, tmp_key;
  Nucleotide nuc;
  hkey_t node;
  size_t i, offset, nxtbse;

  dBNodeBuffer *nodes = &aln->nodes;
  Int32Buffer *rpos = &aln->rpos;

  ctx_assert(nodes->len == rpos->len);
  size_t n = nodes->len, init_len = n;

  db_node_buf_capacity(nodes, n + r->seq.end);
  int32_buf_capacity(rpos, n + r->seq.end);

  while((contig_start = seq_contig_start(r, search_start, kmer_size,
                                         qcutoff, hp_cutoff)) < r->seq.end)
  {
    contig_end = seq_contig_end(r, contig_start, kmer_size,
                                qcutoff, hp_cutoff, &search_start);

    const char *contig = r->seq.b + contig_start;
    size_t contig_len = contig_end - contig_start;

    bkmer = binary_kmer_from_str(contig, kmer_size);
    bkmer = binary_kmer_right_shift_one_base(bkmer);

    for(offset=contig_start, nxtbse=kmer_size-1; nxtbse < contig_len; nxtbse++,offset++)
    {
      nuc = dna_char_to_nuc(contig[nxtbse]);
      bkmer = binary_kmer_left_shift_add(bkmer, kmer_size, nuc);
      tmp_key = binary_kmer_get_key(bkmer, kmer_size);
      node = hash_table_find(&db_graph->ht, tmp_key);

      if(node != HASH_NOT_FOUND &&
         (colour == -1 || db_node_has_col(db_graph, node, colour)))
      {
        nodes->b[n].key = node;
        nodes->b[n].orient = bkmer_get_orientation(bkmer, tmp_key);
        rpos->b[n] = offset;
        n++;
      }
    }
  }

  // Return number of bases from the last kmer found until read end
  size_t ret = (n == init_len ? r->seq.end /* No kmers found */
                              : r->seq.end - (rpos->b[n-1] + kmer_size));

  nodes->len = rpos->len = n;

  // Check for sequence gaps
  for(i = init_len; i+1 < nodes->len; i++) {
    if(rpos->b[i]+1 < rpos->b[i+1]) {
      aln->seq_gaps = true;
      break;
    }
  }

  return ret;
}