void sim_disk_file::serve(void* buffer, offset_type offset, size_type bytes, request::request_type type) { scoped_mutex_lock fd_lock(fd_mutex); double op_start = timestamp(); stats::scoped_read_write_timer read_write_timer(bytes, type == request::WRITE); void* mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, file_des, offset); if (mem == MAP_FAILED) { STXXL_THROW_ERRNO (io_error, " mmap() failed." << " Page size: " << sysconf(_SC_PAGESIZE) << " offset modulo page size " << (offset % sysconf(_SC_PAGESIZE))); } else if (mem == 0) { STXXL_THROW_ERRNO(io_error, "mmap() returned NULL"); } else { if (type == request::READ) { memcpy(buffer, mem, bytes); } else { memcpy(mem, buffer, bytes); } STXXL_THROW_ERRNO_NE_0(munmap(mem, bytes), io_error, "munmap() failed"); } double delay = get_delay(offset, bytes); delay = delay - timestamp() + op_start; assert(delay > 0.0); int seconds_to_wait = static_cast<int>(floor(delay)); if (seconds_to_wait) sleep(seconds_to_wait); usleep((useconds_t)((delay - seconds_to_wait) * 1000000.)); }
STXXL_BEGIN_NAMESPACE linuxaio_queue::linuxaio_queue(int desired_queue_length) : num_waiting_requests(0), num_free_events(0), num_posted_requests(0), post_thread_state(NOT_RUNNING), wait_thread_state(NOT_RUNNING) { if (desired_queue_length == 0) { // default value, 64 entries per queue (i.e. usually per disk) should // be enough max_events = 64; } else max_events = desired_queue_length; // negotiate maximum number of simultaneous events with the OS context = 0; long result; while ((result = syscall(SYS_io_setup, max_events, &context)) == -1 && errno == EAGAIN && max_events > 1) { max_events <<= 1; // try with half as many events } if (result != 0) { STXXL_THROW_ERRNO(io_error, "linuxaio_queue::linuxaio_queue" " io_setup() nr_events=" << max_events); } for (int e = 0; e < max_events; ++e) num_free_events++; // cannot set semaphore to value directly STXXL_MSG("Set up an linuxaio queue with " << max_events << " entries."); start_thread(post_async, static_cast<void*>(this), post_thread, post_thread_state); start_thread(wait_async, static_cast<void*>(this), wait_thread, wait_thread_state); }
STXXL_BEGIN_NAMESPACE void mmap_file::serve(void* buffer, offset_type offset, size_type bytes, request::request_type type) { scoped_mutex_lock fd_lock(fd_mutex); //assert(offset + bytes <= _size()); stats::scoped_read_write_timer read_write_timer(bytes, type == request::WRITE); int prot = (type == request::READ) ? PROT_READ : PROT_WRITE; void* mem = mmap(NULL, bytes, prot, MAP_SHARED, file_des, offset); // void *mem = mmap (buffer, bytes, prot , MAP_SHARED|MAP_FIXED , file_des, offset); // STXXL_MSG("Mmaped to "<<mem<<" , buffer suggested at "<<buffer); if (mem == MAP_FAILED) { STXXL_THROW_ERRNO(io_error, " mmap() failed." << " path=" << filename << " bytes=" << bytes << " Page size: " << sysconf(_SC_PAGESIZE) << " offset modulo page size " << (offset % sysconf(_SC_PAGESIZE))); } else if (mem == 0) { STXXL_THROW_ERRNO(io_error, "mmap() returned NULL"); } else { if (type == request::READ) { memcpy(buffer, mem, bytes); } else { memcpy(mem, buffer, bytes); } STXXL_THROW_ERRNO_NE_0(munmap(mem, bytes), io_error, "munmap() failed"); } }
// internal routines, run by the posting thread void linuxaio_queue::post_requests() { request_ptr req; io_event* events = new io_event[max_events]; for ( ; ; ) // as long as thread is running { // might block until next request or message comes in int num_currently_waiting_requests = num_waiting_requests--; // terminate if termination has been requested if (post_thread_state() == TERMINATING && num_currently_waiting_requests == 0) break; scoped_mutex_lock lock(waiting_mtx); if (!waiting_requests.empty()) { req = waiting_requests.front(); waiting_requests.pop_front(); lock.unlock(); num_free_events--; // might block because too many requests are posted // polymorphic_downcast while (!dynamic_cast<linuxaio_request*>(req.get())->post()) { // post failed, so first handle events to make queues (more) // empty, then try again. // wait for at least one event to complete, no time limit long num_events = syscall(SYS_io_getevents, context, 1, max_events, events, NULL); if (num_events < 0) { STXXL_THROW_ERRNO(io_error, "linuxaio_queue::post_requests" " io_getevents() nr_events=" << num_events); } handle_events(events, num_events, false); } // request is finally posted num_posted_requests++; } else { lock.unlock(); // num_waiting_requests-- was premature, compensate for that num_waiting_requests++; } } delete[] events; }
wbtl_file::offset_type wbtl_file::get_next_write_block() { // mapping_lock has to be aquired by caller sortseq::iterator space = std::find_if(free_space.begin(), free_space.end(), bind2nd(FirstFit(), write_block_size) _STXXL_FORCE_SEQUENTIAL); if (space != free_space.end()) { offset_type region_pos = (*space).first; offset_type region_size = (*space).second; free_space.erase(space); if (region_size > write_block_size) free_space[region_pos + write_block_size] = region_size - write_block_size; free_bytes -= write_block_size; STXXL_VERBOSE_WBTL("wbtl:nextwb p" << FMT_A_S(region_pos, write_block_size) << " F f" << FMT_A_C(free_bytes, free_space.size())); return region_pos; } STXXL_THROW_ERRNO(io_error, "OutOfSpace, probably fragmented"); }
// internal routines, run by the waiting thread void linuxaio_queue::wait_requests() { request_ptr req; io_event* events = new io_event[max_events]; for ( ; ; ) // as long as thread is running { // might block until next request is posted or message comes in int num_currently_posted_requests = num_posted_requests--; // terminate if termination has been requested if (wait_thread_state() == TERMINATING && num_currently_posted_requests == 0) break; // wait for at least one of them to finish long num_events; while (1) { num_events = syscall(SYS_io_getevents, context, 1, max_events, events, NULL); if (num_events < 0) { if (errno == EINTR) { // io_getevents may return prematurely in case a signal is received continue; } STXXL_THROW_ERRNO(io_error, "linuxaio_queue::wait_requests" " io_getevents() nr_events=" << max_events); } break; } num_posted_requests++; // compensate for the one eaten prematurely above handle_events(events, num_events, false); } delete[] events; }
STXXL_BEGIN_NAMESPACE void syscall_file::serve(const request* req) throw (io_error) { scoped_mutex_lock fd_lock(fd_mutex); assert(req->get_file() == this); offset_type offset = req->get_offset(); char* buffer = static_cast<char*>(req->get_buffer()); size_type bytes = req->get_size(); request::request_type type = req->get_type(); stats::scoped_read_write_timer read_write_timer(bytes, type == request::WRITE); while (bytes > 0) { off_t rc = ::lseek(file_des, offset, SEEK_SET); if (rc < 0) { STXXL_THROW_ERRNO (io_error, " this=" << this << " call=::lseek(fd,offset,SEEK_SET)" << " path=" << filename << " fd=" << file_des << " offset=" << offset << " buffer=" << (void*)buffer << " bytes=" << bytes << " type=" << ((type == request::READ) ? "READ" : "WRITE") << " rc=" << rc); } if (type == request::READ) { #if STXXL_MSVC assert(bytes <= std::numeric_limits<unsigned int>::max()); if ((rc = ::read(file_des, buffer, (unsigned int)bytes)) <= 0) #else if ((rc = ::read(file_des, buffer, bytes)) <= 0) #endif { STXXL_THROW_ERRNO (io_error, " this=" << this << " call=::read(fd,buffer,bytes)" << " path=" << filename << " fd=" << file_des << " offset=" << offset << " buffer=" << (void*)buffer << " bytes=" << bytes << " type=" << "READ" << " rc=" << rc); } bytes -= rc; offset += rc; buffer += rc; if (bytes > 0 && offset == this->_size()) { // read request extends past end-of-file // fill reminder with zeroes memset(buffer, 0, bytes); bytes = 0; } } else { #if STXXL_MSVC assert(bytes <= std::numeric_limits<unsigned int>::max()); if ((rc = ::write(file_des, buffer, (unsigned int)bytes)) <= 0) #else if ((rc = ::write(file_des, buffer, bytes)) <= 0) #endif { STXXL_THROW_ERRNO (io_error, " this=" << this << " call=::write(fd,buffer,bytes)" << " path=" << filename << " fd=" << file_des << " offset=" << offset << " buffer=" << (void*)buffer << " bytes=" << bytes << " type=" << "WRITE" << " rc=" << rc); } bytes -= rc; offset += rc; buffer += rc; } } }