Exemple #1
0
// Using file so can call fseek and don't need to load whole graph
static size_t inferedges_on_mmap(const dBGraph *db_graph, bool add_all_edges,
                                 GraphFileReader *file)
{
  ctx_assert(db_graph->num_of_cols == file->hdr.num_of_cols);
  ctx_assert(file_filter_is_direct(&file->fltr));
  ctx_assert2(!isatty(fileno(file->fh)), "Use inferedges_on_stream() instead");
  ctx_assert(file->num_of_kmers >= 0);
  ctx_assert(file->file_size >= 0);

  status("[inferedges] Processing mmap file: %s [hdr: %zu bytes file: %zu bytes]",
         file_filter_path(&file->fltr),
         (size_t)file->hdr_size, (size_t)file->file_size);

  if(fseek(file->fh, 0, SEEK_SET) != 0)
    die("fseek failed: %s", strerror(errno));

  // Open memory mapped file
  void *mmap_ptr = mmap(NULL, file->file_size, PROT_WRITE, MAP_SHARED,
                        fileno(file->fh), 0);

  if(mmap_ptr == MAP_FAILED)
    die("Cannot memory map file: %s [%s]", file->fltr.path.b, strerror(errno));

  const size_t ncols = file->hdr.num_of_cols;
  BinaryKmer bkmer;
  Edges edges[ncols];
  Covg covgs[ncols];

  bool updated;
  size_t i, num_kmers = file->num_of_kmers, num_kmers_edited = 0;
  size_t filekmersize = sizeof(BinaryKmer) + (sizeof(Edges)+sizeof(Covg)) * ncols;

  char *ptr = (char*)mmap_ptr + file->hdr_size;

  for(i = 0; i < num_kmers; i++, ptr += filekmersize)
  {
    char *fh_covgs = ptr      + sizeof(BinaryKmer);
    char *fh_edges = fh_covgs + sizeof(Covg)*ncols;

    memcpy(bkmer.b, ptr,      sizeof(BinaryKmer));
    memcpy(covgs,   fh_covgs, ncols * sizeof(Covg));
    memcpy(edges,   fh_edges, ncols * sizeof(Edges));

    updated = (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph)
                             : infer_pop_edges(bkmer, edges, covgs, db_graph));

    if(updated) {
      memcpy(fh_covgs, covgs, ncols * sizeof(Covg));
      memcpy(fh_edges, edges, ncols * sizeof(Edges));
      num_kmers_edited++;
    }
  }

  if(munmap(mmap_ptr, file->file_size) == -1)
    die("Cannot release mmap file: %s [%s]", file->fltr.path.b, strerror(errno));

  return num_kmers_edited;
}
static inline void infer_edges_node(hkey_t hkey,
                                    bool add_all_edges,
                                    const dBGraph *db_graph,
                                    size_t *num_nodes_modified)
{
  BinaryKmer bkmer = db_node_bkmer(db_graph, hkey);
  Edges *edges = &db_node_edges(db_graph, hkey, 0);
  size_t col;

  // Create coverages that are zero or one depending on if node has colour
  Covg covgs[db_graph->num_of_cols];
  for(col = 0; col < db_graph->num_of_cols; col++)
    covgs[col] = db_node_has_col(db_graph, hkey, col);

  (*num_nodes_modified)
    += (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph)
                      : infer_pop_edges(bkmer, edges, covgs, db_graph));
}
Exemple #3
0
// Using file so can call fseek and don't need to load whole graph
static size_t inferedges_on_file(const dBGraph *db_graph, bool add_all_edges,
                                 GraphFileReader *file, FILE *fout)
{
  ctx_assert(db_graph->num_of_cols == file->hdr.num_of_cols);
  ctx_assert(file_filter_is_direct(&file->fltr));
  ctx_assert2(!isatty(fileno(file->fh)), "Use inferedges_on_stream() instead");
  ctx_assert(fout != NULL);
  ctx_assert(fileno(file->fh) != fileno(fout));

  status("[inferedges] Processing file: %s", file_filter_path(&file->fltr));

  // Print header
  graph_write_header(fout, &file->hdr);

  // Read the input file again
  if(fseek(file->fh, file->hdr_size, SEEK_SET) != 0)
    die("fseek failed: %s", strerror(errno));

  const size_t ncols = file->hdr.num_of_cols;
  BinaryKmer bkmer;
  Edges edges[ncols];
  Covg covgs[ncols];

  size_t num_kmers_edited = 0;
  bool updated;

  while(graph_file_read_reset(file, ncols, &bkmer, covgs, edges))
  {
    updated = (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph)
                             : infer_pop_edges(bkmer, edges, covgs, db_graph));

    graph_write_kmer(fout, file->hdr.num_of_bitfields, file->hdr.num_of_cols,
                     bkmer, covgs, edges);

    num_kmers_edited += updated;
  }

  return num_kmers_edited;
}
Exemple #4
0
static inline int infer_edges_node(hkey_t hkey,
                                   bool add_all_edges,
                                   Covg *tmp_covgs,
                                   const dBGraph *db_graph,
                                   size_t *num_nodes_modified)
{
  BinaryKmer bkmer = db_node_get_bkmer(db_graph, hkey);
  Edges *edges = &db_node_edges(db_graph, hkey, 0);
  size_t col;

  // Create coverages that are zero or one depending on if node has colour
  if(db_graph->col_covgs == NULL) {
    for(col = 0; col < db_graph->num_of_cols; col++)
      tmp_covgs[col] = db_node_has_col(db_graph, hkey, col);
  } else {
    tmp_covgs = &db_node_covg(db_graph, hkey, 0);
  }

  (*num_nodes_modified)
    += (add_all_edges ? infer_all_edges(bkmer, edges, tmp_covgs, db_graph)
                      : infer_pop_edges(bkmer, edges, tmp_covgs, db_graph));

  return 0; // => keep iterating
}