virtual void open_file(std::string base_filename) { filename = filename_degree_data(base_filename); modified = false; if (!use_mmap) { iomgr->allow_preloading(filename); filedesc = iomgr->open_session(filename.c_str(), false); } else { mmap_length = get_filesize(filename); filedesc = open(filename.c_str(), O_RDWR); mmap_file = (degree *) mmap(NULL, mmap_length, PROT_READ | PROT_WRITE, MAP_SHARED, filedesc, 0); assert(mmap_file); } }
static void delete_shards(std::string base_filename, int nshards) { #ifdef DYNAMICEDATA typedef int EdgeDataType; #else typedef EdgeDataType_ EdgeDataType; #endif logstream(LOG_DEBUG) << "Deleting files for " << base_filename << " shards=" << nshards << std::endl; std::string intervalfname = filename_intervals(base_filename, nshards); if (file_exists(intervalfname)) { int err = remove(intervalfname.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << intervalfname << ", " << strerror(errno) << std::endl; } /* Note: degree file is not removed, because same graph with different number of shards share the file. This should be probably change. std::string degreefname = filename_degree_data(base_filename); if (file_exists(degreefname)) { remove(degreefname.c_str()); } */ size_t blocksize = 4096 * 1024; while (blocksize % sizeof(EdgeDataType) != 0) blocksize++; for(int p=0; p < nshards; p++) { int blockid = 0; std::string filename_edata = filename_shard_edata<EdgeDataType>(base_filename, p, nshards); std::string fsizename = filename_edata + ".size"; if (file_exists(fsizename)) { int err = remove(fsizename.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << fsizename << ", " << strerror(errno) << std::endl; } while(true) { std::string block_filename = filename_shard_edata_block(filename_edata, blockid, blocksize); if (file_exists(block_filename)) { int err = remove(block_filename.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << block_filename << ", " << strerror(errno) << std::endl; } else { break; } #ifdef DYNAMICEDATA delete_block_uncompressed_sizefile(block_filename); #endif blockid++; } std::string dirname = dirname_shard_edata_block(filename_edata, blocksize); if (file_exists(dirname)) { int err = remove(dirname.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing directory " << dirname << ", " << strerror(errno) << std::endl; } std::string adjname = filename_shard_adj(base_filename, p, nshards); logstream(LOG_DEBUG) << "Deleting " << adjname << " exists: " << file_exists(adjname) << std::endl; if (file_exists(adjname)) { int err = remove(adjname.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << adjname << ", " << strerror(errno) << std::endl; } } std::string numv_filename = base_filename + ".numvertices"; if (file_exists(numv_filename)) { int err = remove(numv_filename.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << numv_filename << ", " << strerror(errno) << std::endl; } /* Degree file */ std::string deg_filename = filename_degree_data(base_filename); if (file_exists(deg_filename)) { int err = remove(deg_filename.c_str()); if (err != 0) logstream(LOG_ERROR) << "Error removing file " << deg_filename << ", " << strerror(errno) << std::endl; } }
static int find_shards(std::string base_filename, std::string shard_string="auto") { int try_shard_num; int start_num = 0; int last_shard_num = 2400; if (shard_string == "auto") { start_num = 0; } else { start_num = atoi(shard_string.c_str()); } if (start_num > 0) { last_shard_num = start_num; } size_t blocksize = 4096 * 1024; while (blocksize % sizeof(EdgeDataType) != 0) blocksize++; for(try_shard_num=start_num; try_shard_num <= last_shard_num; try_shard_num++) { std::string last_shard_name = filename_shard_edata<EdgeDataType>(base_filename, try_shard_num - 1, try_shard_num); std::string last_block_name = filename_shard_edata_block(last_shard_name, 0, blocksize); int tryf = open(last_block_name.c_str(), O_RDONLY); if (tryf >= 0) { // Found! close(tryf); int nshards_candidate = try_shard_num; bool success = true; // Validate all relevant files exists for(int p=0; p < nshards_candidate; p++) { std::string sname = filename_shard_edata_block( filename_shard_edata<EdgeDataType>(base_filename, p, nshards_candidate), 0, blocksize); if (!file_exists(sname)) { logstream(LOG_DEBUG) << "Missing directory file: " << sname << std::endl; success = false; break; } sname = filename_shard_adj(base_filename, p, nshards_candidate); if (!file_exists(sname)) { logstream(LOG_DEBUG) << "Missing shard file: " << sname << std::endl; success = false; break; } } // Check degree file std::string degreefname = filename_degree_data(base_filename); if (!file_exists(degreefname)) { logstream(LOG_ERROR) << "Missing degree file: " << degreefname << std::endl; logstream(LOG_ERROR) << "You need to preprocess (sharder) your file again!" << std::endl; return 0; } std::string intervalfname = filename_intervals(base_filename, nshards_candidate); if (!file_exists(intervalfname)) { logstream(LOG_ERROR) << "Missing intervals file: " << intervalfname << std::endl; logstream(LOG_ERROR) << "You need to preprocess (sharder) your file again!" << std::endl; return 0; } if (!success) { continue; } logstream(LOG_INFO) << "Detected number of shards: " << nshards_candidate << std::endl; logstream(LOG_INFO) << "To specify a different number of shards, use command-line parameter 'nshards'" << std::endl; return nshards_candidate; } } if (last_shard_num == start_num) { logstream(LOG_WARNING) << "Could not find shards with nshards = " << start_num << std::endl; logstream(LOG_WARNING) << "Please define 'nshards 0' or 'nshards auto' to automatically detect." << std::endl; } return 0; }
virtual void open_file(std::string base_filename) { filename = filename_degree_data(base_filename); iomgr->allow_preloading(filename); filedesc = iomgr->open_session(filename.c_str(), false); }