void load_vertices(vid_t window_st, vid_t window_en, std::vector<svertex_t> & prealloc, bool inedges=true, bool outedges=true) { /* Find file size */ m.start_time("memoryshard_create_edges"); assert(adjdata != NULL); // Now start creating vertices uint8_t * ptr = adjdata; uint8_t * end = ptr + adjfilesize; vid_t vid = 0; edgeptr = 0; streaming_offset = 0; streaming_offset_vid = 0; streaming_offset_edge_ptr = 0; range_start_offset = adjfilesize; range_start_edge_ptr = edatafilesize; bool setoffset = false; bool setrangeoffset = false; while (ptr < end) { check_stream_progress(6, ptr-adjdata); // read at least 6 bytes if (!setoffset && vid > range_end) { // This is where streaming should continue. Notice that because of the // non-zero counters, this might be a bit off. streaming_offset = ptr-adjdata; streaming_offset_vid = vid; streaming_offset_edge_ptr = edgeptr; setoffset = true; } if (!setrangeoffset && vid>=range_st) { range_start_offset = ptr-adjdata; range_start_edge_ptr = edgeptr; setrangeoffset = true; } uint8_t ns = *ptr; int n; ptr += sizeof(uint8_t); if (ns == 0x00) { // next value tells the number of vertices with zeros uint8_t nz = *ptr; ptr += sizeof(uint8_t); vid++; vid += nz; continue; } if (ns == 0xff) { // If 255 is not enough, then stores a 32-bit integer after. n = *((uint32_t*)ptr); ptr += sizeof(uint32_t); } else { n = ns; } svertex_t* vertex = NULL; if (vid>=window_st && vid <=window_en) { // TODO: Make more efficient vertex = &prealloc[vid-window_st]; if (!vertex->scheduled) vertex = NULL; } check_stream_progress(n*4, ptr-adjdata); while(--n>=0) { bool special_edge = false; vid_t target = (sizeof(ET)==sizeof(ETspecial) ? *((vid_t*) ptr) : translate_edge(*((vid_t*) ptr), special_edge)); ptr += sizeof(vid_t); if (vertex != NULL && outedges) { vertex->add_outedge(target, (only_adjacency ? NULL : (ET*) &((char*)edgedata)[edgeptr]), special_edge); } if (target >= window_st) { if (target <= window_en) { /* In edge */ if (inedges) { svertex_t & dstvertex = prealloc[target-window_st]; if (dstvertex.scheduled) { assert(only_adjacency || edgeptr < edatafilesize); dstvertex.add_inedge(vid, (only_adjacency ? NULL : (ET*) &((char*)edgedata)[edgeptr]), special_edge); if (vertex != NULL) { dstvertex.parallel_safe = false; vertex->parallel_safe = false; // This edge is shared with another vertex in the same window - not safe to run in parallel. } } } } else if (sizeof(ET) == sizeof(ETspecial)) { // Note, we cannot skip if there can be "special edges". FIXME so dirty. // This vertex has no edges any more for this window, bail out if (vertex == NULL) { ptr += sizeof(vid_t)*n; edgeptr += (n+1)*sizeof(ET); break; } } } edgeptr += sizeof(ET) * !special_edge + sizeof(ETspecial) * special_edge; } vid++; } m.stop_time("memoryshard_create_edges", false); }
/** * Read out-edges for vertices. */ void read_next_vertices(int nvecs, vid_t start, std::vector<svertex_t> & prealloc, bool record_index=false, bool disable_writes=false) { metrics_entry me = m.start_time(); if (!record_index) move_close_to(start); /* Release the blocks we do not need anymore */ curblock = NULL; release_prior_to_offset(false, disable_writes); assert(activeblocks.size() <= 1); /* Read next */ if (!activeblocks.empty() && !only_adjacency) { curblock = &activeblocks[0]; } vid_t lastrec = start; window_start_edataoffset = edataoffset; for(int i=((int)curvid) - ((int)start); i<nvecs; i++) { if (adjoffset >= adjfilesize) break; // TODO: skip unscheduled vertices. int n; if (record_index && (size_t)(curvid - lastrec) >= (size_t) std::max((int)100000, nvecs/16)) { save_offset(); lastrec = curvid; } uint8_t ns = read_val<uint8_t>(); if (ns == 0x00) { curvid++; uint8_t nz = read_val<uint8_t>(); curvid += nz; i += nz; continue; } if (ns == 0xff) { n = read_val<uint32_t>(); } else { n = ns; } if (i<0) { // Just skipping skip(n, sizeof(vid_t)); } else { svertex_t& vertex = prealloc[i]; assert(vertex.id() == curvid); if (vertex.scheduled) { while(--n >= 0) { bool special_edge = false; vid_t target = (sizeof(ET) == sizeof(ETspecial) ? read_val<vid_t>() : translate_edge(read_val<vid_t>(), special_edge)); ET * evalue = read_edgeptr(); vertex.add_outedge(target, evalue, special_edge); if (!((target >= range_st && target <= range_end))) { logstream(LOG_ERROR) << "Error : " << target << " not in [" << range_st << " - " << range_end << "]" << std::endl; iomgr->print_session(adjfile_session); } assert(target >= range_st && target <= range_end); } } else { // This vertex was not scheduled, so we can just skip its edges. skip(n, sizeof(vid_t)); } } curvid++; } m.stop_time(me, "read_next_vertices"); curblock = NULL; }