// Using file so can call fseek and don't need to load whole graph static size_t inferedges_on_mmap(const dBGraph *db_graph, bool add_all_edges, GraphFileReader *file) { ctx_assert(db_graph->num_of_cols == file->hdr.num_of_cols); ctx_assert(file_filter_is_direct(&file->fltr)); ctx_assert2(!isatty(fileno(file->fh)), "Use inferedges_on_stream() instead"); ctx_assert(file->num_of_kmers >= 0); ctx_assert(file->file_size >= 0); status("[inferedges] Processing mmap file: %s [hdr: %zu bytes file: %zu bytes]", file_filter_path(&file->fltr), (size_t)file->hdr_size, (size_t)file->file_size); if(fseek(file->fh, 0, SEEK_SET) != 0) die("fseek failed: %s", strerror(errno)); // Open memory mapped file void *mmap_ptr = mmap(NULL, file->file_size, PROT_WRITE, MAP_SHARED, fileno(file->fh), 0); if(mmap_ptr == MAP_FAILED) die("Cannot memory map file: %s [%s]", file->fltr.path.b, strerror(errno)); const size_t ncols = file->hdr.num_of_cols; BinaryKmer bkmer; Edges edges[ncols]; Covg covgs[ncols]; bool updated; size_t i, num_kmers = file->num_of_kmers, num_kmers_edited = 0; size_t filekmersize = sizeof(BinaryKmer) + (sizeof(Edges)+sizeof(Covg)) * ncols; char *ptr = (char*)mmap_ptr + file->hdr_size; for(i = 0; i < num_kmers; i++, ptr += filekmersize) { char *fh_covgs = ptr + sizeof(BinaryKmer); char *fh_edges = fh_covgs + sizeof(Covg)*ncols; memcpy(bkmer.b, ptr, sizeof(BinaryKmer)); memcpy(covgs, fh_covgs, ncols * sizeof(Covg)); memcpy(edges, fh_edges, ncols * sizeof(Edges)); updated = (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph) : infer_pop_edges(bkmer, edges, covgs, db_graph)); if(updated) { memcpy(fh_covgs, covgs, ncols * sizeof(Covg)); memcpy(fh_edges, edges, ncols * sizeof(Edges)); num_kmers_edited++; } } if(munmap(mmap_ptr, file->file_size) == -1) die("Cannot release mmap file: %s [%s]", file->fltr.path.b, strerror(errno)); return num_kmers_edited; }
static inline void infer_edges_node(hkey_t hkey, bool add_all_edges, const dBGraph *db_graph, size_t *num_nodes_modified) { BinaryKmer bkmer = db_node_bkmer(db_graph, hkey); Edges *edges = &db_node_edges(db_graph, hkey, 0); size_t col; // Create coverages that are zero or one depending on if node has colour Covg covgs[db_graph->num_of_cols]; for(col = 0; col < db_graph->num_of_cols; col++) covgs[col] = db_node_has_col(db_graph, hkey, col); (*num_nodes_modified) += (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph) : infer_pop_edges(bkmer, edges, covgs, db_graph)); }
// Using file so can call fseek and don't need to load whole graph static size_t inferedges_on_file(const dBGraph *db_graph, bool add_all_edges, GraphFileReader *file, FILE *fout) { ctx_assert(db_graph->num_of_cols == file->hdr.num_of_cols); ctx_assert(file_filter_is_direct(&file->fltr)); ctx_assert2(!isatty(fileno(file->fh)), "Use inferedges_on_stream() instead"); ctx_assert(fout != NULL); ctx_assert(fileno(file->fh) != fileno(fout)); status("[inferedges] Processing file: %s", file_filter_path(&file->fltr)); // Print header graph_write_header(fout, &file->hdr); // Read the input file again if(fseek(file->fh, file->hdr_size, SEEK_SET) != 0) die("fseek failed: %s", strerror(errno)); const size_t ncols = file->hdr.num_of_cols; BinaryKmer bkmer; Edges edges[ncols]; Covg covgs[ncols]; size_t num_kmers_edited = 0; bool updated; while(graph_file_read_reset(file, ncols, &bkmer, covgs, edges)) { updated = (add_all_edges ? infer_all_edges(bkmer, edges, covgs, db_graph) : infer_pop_edges(bkmer, edges, covgs, db_graph)); graph_write_kmer(fout, file->hdr.num_of_bitfields, file->hdr.num_of_cols, bkmer, covgs, edges); num_kmers_edited += updated; } return num_kmers_edited; }
static inline int infer_edges_node(hkey_t hkey, bool add_all_edges, Covg *tmp_covgs, const dBGraph *db_graph, size_t *num_nodes_modified) { BinaryKmer bkmer = db_node_get_bkmer(db_graph, hkey); Edges *edges = &db_node_edges(db_graph, hkey, 0); size_t col; // Create coverages that are zero or one depending on if node has colour if(db_graph->col_covgs == NULL) { for(col = 0; col < db_graph->num_of_cols; col++) tmp_covgs[col] = db_node_has_col(db_graph, hkey, col); } else { tmp_covgs = &db_node_covg(db_graph, hkey, 0); } (*num_nodes_modified) += (add_all_edges ? infer_all_edges(bkmer, edges, tmp_covgs, db_graph) : infer_pop_edges(bkmer, edges, tmp_covgs, db_graph)); return 0; // => keep iterating }