Ejemplo n.º 1
0
/*
 * Get position after current segment. A segement is a stretch of kmers aligned
 * to the graph with no gaps.
 * @param aln           alignment of read to the graph
 * @param start         offset in the alignment that starts this segments
 * @param missing_edge  set to 1 if the segment was ended due to a missing edge
 * @return index of node after next gap or aln->nodes.len if no more gaps
 */
size_t db_alignment_next_gap(const dBAlignment *aln, size_t start,
                             bool *missing_edge, const dBGraph *db_graph)
{
  size_t i, end = aln->rpos.len;
  const int32_t *rpos = aln->rpos.b;
  const dBNode *nodes = aln->nodes.b;
  int colour = aln->colour;
  Edges edges;
  Nucleotide nuc;

  *missing_edge = false;

  if(end == 0) return 0;

  // Set upper bound on position of the next gap as within this read
  if(aln->used_r1 && aln->used_r2 && start < aln->r2strtidx)
    end = aln->r2strtidx;

  for(i = start+1; i < end && rpos[i-1]+1 == rpos[i]; i++)
  {
    // Check for a missing edge
    edges = colour < 0 ? db_node_get_edges_union(db_graph, nodes[i-1].key)
                       : db_node_get_edges(db_graph, nodes[i-1].key, colour);
    nuc = db_node_get_last_nuc(nodes[i], db_graph);

    if(!edges_has_edge(edges, nuc, nodes[i-1].orient)) {
      *missing_edge = true;
      break;
    }
  }

  // Return position after gap
  return i;
}
Ejemplo n.º 2
0
// Do not print first k-1 bases => 3 nodes gives 3bp instead of 3+k-1
void db_nodes_gzprint_cont(const dBNode *nodes, size_t num,
                           const dBGraph *db_graph, gzFile out)
{
  size_t i;
  Nucleotide nuc;
  for(i = 0; i < num; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    gzputc(out, dna_nuc_to_char(nuc));
  }
}
Ejemplo n.º 3
0
void db_nodes_gzprint(const dBNode *nodes, size_t num,
                      const dBGraph *db_graph, gzFile out)
{
  size_t i, kmer_size = db_graph->kmer_size;
  Nucleotide nuc;
  BinaryKmer bkmer;
  char tmp[MAX_KMER_SIZE+1];

  bkmer = db_node_oriented_bkmer(db_graph, nodes[0]);
  binary_kmer_to_str(bkmer, kmer_size, tmp);
  gzputs(out, tmp);

  for(i = 1; i < num; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    gzputc(out, dna_nuc_to_char(nuc));
  }
}
Ejemplo n.º 4
0
void db_nodes_print(const dBNode *nodes, size_t num,
                    const dBGraph *db_graph, FILE *out)
{
  const size_t kmer_size = db_graph->kmer_size;
  size_t i;
  Nucleotide nuc;
  BinaryKmer bkmer;
  char tmp[MAX_KMER_SIZE+1];

  bkmer = db_node_oriented_bkmer(db_graph, nodes[0]);
  binary_kmer_to_str(bkmer, kmer_size, tmp);
  fputs(tmp, out);

  for(i = 1; i < num; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    fputc(dna_nuc_to_char(nuc), out);
  }
}
Ejemplo n.º 5
0
// Returns number of bytes added
size_t db_nodes_to_str(const dBNode *nodes, size_t num,
                       const dBGraph *db_graph, char *str)
{
  if(num == 0) return 0;

  size_t i;
  size_t kmer_size = db_graph->kmer_size;
  BinaryKmer bkmer = db_node_get_bkmer(db_graph, nodes[0].key);
  Nucleotide nuc;

  binary_kmer_to_str(bkmer, kmer_size, str);
  if(nodes[0].orient == REVERSE) dna_reverse_complement_str(str, kmer_size);

  for(i = 1; i < num; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    str[kmer_size+i-1] = dna_nuc_to_char(nuc);
  }

  str[kmer_size+num-1] = '\0';
  return kmer_size+num-1;
}
Ejemplo n.º 6
0
static void branch_to_str(const dBNode *nodes, size_t len, bool print_first_kmer,
                          StrBuf *sbuf, const dBGraph *db_graph)
{
  size_t i = print_first_kmer, kmer_size = db_graph->kmer_size;
  Nucleotide nuc;
  BinaryKmer bkmer;

  if(print_first_kmer) {
    strbuf_ensure_capacity(sbuf, sbuf->end + kmer_size);
    bkmer = db_node_oriented_bkmer(db_graph, nodes[0]);
    binary_kmer_to_str(bkmer, kmer_size, sbuf->b+sbuf->end);
    sbuf->end += kmer_size;
  }

  // i == 1 if print_first_kmer, otherwise 0
  strbuf_ensure_capacity(sbuf, sbuf->end + len + 1); // +1 for '\n'
  for(; i < len; i++) {
    nuc = db_node_get_last_nuc(nodes[i], db_graph);
    sbuf->b[sbuf->end++] = dna_nuc_to_char(nuc);
  }

  sbuf->b[sbuf->end++] = '\n';
  sbuf->b[sbuf->end] = '\0';
}