static status_t read_from_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, vm_page_reservation* reservation, size_t reservePages) { TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, " "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); if (!useBuffer) return B_OK; generic_io_vec vec; vec.base = buffer; vec.length = bufferSize; push_access(ref, offset, bufferSize, false); ref->cache->Unlock(); vm_page_unreserve_pages(reservation); generic_size_t toRead = bufferSize; status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset, &vec, 1, 0, &toRead); if (status == B_OK) reserve_pages(ref, reservation, reservePages, false); ref->cache->Lock(); return status; }
extern "C" status_t file_cache_init(void) { // allocate a clean page we can use for writing zeroes vm_page_reservation reservation; vm_page_reserve_pages(&reservation, 1, VM_PRIORITY_SYSTEM); vm_page* page = vm_page_allocate_page(&reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR); vm_page_unreserve_pages(&reservation); sZeroPage = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; for (uint32 i = 0; i < kZeroVecCount; i++) { sZeroVecs[i].base = sZeroPage; sZeroVecs[i].length = B_PAGE_SIZE; } register_generic_syscall(CACHE_SYSCALLS, file_cache_control, 1, 0); return B_OK; }
static status_t write_to_file(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, vm_page_reservation* reservation, size_t reservePages) { push_access(ref, offset, bufferSize, true); ref->cache->Unlock(); vm_page_unreserve_pages(reservation); status_t status = B_OK; if (!useBuffer) { while (bufferSize > 0) { generic_size_t written = min_c(bufferSize, kZeroVecSize); status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, sZeroVecs, kZeroVecCount, B_PHYSICAL_IO_REQUEST, &written); if (status != B_OK) return status; if (written == 0) return B_ERROR; bufferSize -= written; pageOffset += written; } } else { generic_io_vec vec; vec.base = buffer; vec.length = bufferSize; generic_size_t toWrite = bufferSize; status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset, &vec, 1, 0, &toWrite); } if (status == B_OK) reserve_pages(ref, reservation, reservePages, true); ref->cache->Lock(); return status; }
/*! Iteratively correct the reported capacity by trying to read from the device close to its end. */ static uint64 test_capacity(cd_driver_info *info) { static const size_t kMaxEntries = 4; const uint32 blockSize = info->block_size; const size_t kBufferSize = blockSize * 4; TRACE("test_capacity: read with buffer size %" B_PRIuSIZE ", block size %" B_PRIu32", capacity %llu\n", kBufferSize, blockSize, info->original_capacity); info->capacity = info->original_capacity; size_t numBlocks = B_PAGE_SIZE / blockSize; uint64 offset = info->original_capacity; if (offset <= numBlocks) return B_OK; offset -= numBlocks; scsi_ccb *request = info->scsi->alloc_ccb(info->scsi_device); if (request == NULL) return B_NO_MEMORY; // Allocate buffer physical_entry entries[4]; size_t numEntries = 0; vm_page_reservation reservation; vm_page_reserve_pages(&reservation, (kBufferSize - 1 + B_PAGE_SIZE) / B_PAGE_SIZE, VM_PRIORITY_SYSTEM); for (size_t left = kBufferSize; numEntries < kMaxEntries && left > 0; numEntries++) { size_t bytes = std::min(left, (size_t)B_PAGE_SIZE); vm_page* page = vm_page_allocate_page(&reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_BUSY); entries[numEntries].address = page->physical_page_number * B_PAGE_SIZE; entries[numEntries].size = bytes;; left -= bytes; } vm_page_unreserve_pages(&reservation); // Read close to the end of the device to find out its real end // Only try 1 second before the end (= 75 blocks) while (offset > info->original_capacity - 75) { size_t bytesTransferred; status_t status = sSCSIPeripheral->read_write(info->scsi_periph_device, request, offset, numBlocks, entries, numEntries, false, &bytesTransferred); TRACE("test_capacity: read from offset %llu: %s\n", offset, strerror(status)); if (status == B_OK || (request->sense[0] & 0x7f) != 0x70) break; switch (request->sense[2]) { case SCSIS_KEY_MEDIUM_ERROR: case SCSIS_KEY_ILLEGAL_REQUEST: case SCSIS_KEY_VOLUME_OVERFLOW: { // find out the problematic sector uint32 errorBlock = (request->sense[3] << 24U) | (request->sense[4] << 16U) | (request->sense[5] << 8U) | request->sense[6]; if (errorBlock >= offset) info->capacity = errorBlock; break; } default: break; } if (numBlocks > offset) break; offset -= numBlocks; } info->scsi->free_ccb(request); for (size_t i = 0; i < numEntries; i++) { vm_page_set_state(vm_lookup_page(entries[i].address / B_PAGE_SIZE), PAGE_STATE_FREE); } if (info->capacity != info->original_capacity) { dprintf("scsi_cd: adjusted capacity from %llu to %llu blocks.\n", info->original_capacity, info->capacity); } return B_OK; }
extern "C" void cache_prefetch_vnode(struct vnode* vnode, off_t offset, size_t size) { if (size == 0) return; VMCache* cache; if (vfs_get_vnode_cache(vnode, &cache, false) != B_OK) return; file_cache_ref* ref = ((VMVnodeCache*)cache)->FileCacheRef(); off_t fileSize = cache->virtual_end; if ((off_t)(offset + size) > fileSize) size = fileSize - offset; // "offset" and "size" are always aligned to B_PAGE_SIZE, offset = ROUNDDOWN(offset, B_PAGE_SIZE); size = ROUNDUP(size, B_PAGE_SIZE); size_t reservePages = size / B_PAGE_SIZE; // Don't do anything if we don't have the resources left, or the cache // already contains more than 2/3 of its pages if (offset >= fileSize || vm_page_num_unused_pages() < 2 * reservePages || 3 * cache->page_count > 2 * fileSize / B_PAGE_SIZE) { cache->ReleaseRef(); return; } size_t bytesToRead = 0; off_t lastOffset = offset; vm_page_reservation reservation; vm_page_reserve_pages(&reservation, reservePages, VM_PRIORITY_USER); cache->Lock(); while (true) { // check if this page is already in memory if (size > 0) { vm_page* page = cache->LookupPage(offset); offset += B_PAGE_SIZE; size -= B_PAGE_SIZE; if (page == NULL) { bytesToRead += B_PAGE_SIZE; continue; } } if (bytesToRead != 0) { // read the part before the current page (or the end of the request) PrecacheIO* io = new(std::nothrow) PrecacheIO(ref, lastOffset, bytesToRead); if (io == NULL || io->Prepare(&reservation) != B_OK) { delete io; break; } // we must not have the cache locked during I/O cache->Unlock(); io->ReadAsync(); cache->Lock(); bytesToRead = 0; } if (size == 0) { // we have reached the end of the request break; } lastOffset = offset; } cache->ReleaseRefAndUnlock(); vm_page_unreserve_pages(&reservation); }
static status_t cache_io(void* _cacheRef, void* cookie, off_t offset, addr_t buffer, size_t* _size, bool doWrite) { if (_cacheRef == NULL) panic("cache_io() called with NULL ref!\n"); file_cache_ref* ref = (file_cache_ref*)_cacheRef; VMCache* cache = ref->cache; off_t fileSize = cache->virtual_end; bool useBuffer = buffer != 0; TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n", ref, offset, (void*)buffer, *_size, doWrite ? "write" : "read")); // out of bounds access? if (offset >= fileSize || offset < 0) { *_size = 0; return B_OK; } int32 pageOffset = offset & (B_PAGE_SIZE - 1); size_t size = *_size; offset -= pageOffset; if ((off_t)(offset + pageOffset + size) > fileSize) { // adapt size to be within the file's offsets size = fileSize - pageOffset - offset; *_size = size; } if (size == 0) return B_OK; // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE, // the "last*" variables always point to the end of the last // satisfied request part const uint32 kMaxChunkSize = MAX_IO_VECS * B_PAGE_SIZE; size_t bytesLeft = size, lastLeft = size; int32 lastPageOffset = pageOffset; addr_t lastBuffer = buffer; off_t lastOffset = offset; size_t lastReservedPages = min_c(MAX_IO_VECS, (pageOffset + bytesLeft + B_PAGE_SIZE - 1) >> PAGE_SHIFT); size_t reservePages = 0; size_t pagesProcessed = 0; cache_func function = NULL; vm_page_reservation reservation; reserve_pages(ref, &reservation, lastReservedPages, doWrite); AutoLocker<VMCache> locker(cache); while (bytesLeft > 0) { // Periodically reevaluate the low memory situation and select the // read/write hook accordingly if (pagesProcessed % 32 == 0) { if (size >= BYPASS_IO_SIZE && low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) { // In low memory situations we bypass the cache beyond a // certain I/O size. function = doWrite ? write_to_file : read_from_file; } else function = doWrite ? write_to_cache : read_into_cache; } // check if this page is already in memory vm_page* page = cache->LookupPage(offset); if (page != NULL) { // The page may be busy - since we need to unlock the cache sometime // in the near future, we need to satisfy the request of the pages // we didn't get yet (to make sure no one else interferes in the // meantime). status_t status = satisfy_cache_io(ref, cookie, function, offset, buffer, useBuffer, pageOffset, bytesLeft, reservePages, lastOffset, lastBuffer, lastPageOffset, lastLeft, lastReservedPages, &reservation); if (status != B_OK) return status; // Since satisfy_cache_io() unlocks the cache, we need to look up // the page again. page = cache->LookupPage(offset); if (page != NULL && page->busy) { cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true); continue; } } size_t bytesInPage = min_c(size_t(B_PAGE_SIZE - pageOffset), bytesLeft); TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset " "= %lu\n", offset, page, bytesLeft, pageOffset)); if (page != NULL) { if (doWrite || useBuffer) { // Since the following user_mem{cpy,set}() might cause a page // fault, which in turn might cause pages to be reserved, we // need to unlock the cache temporarily to avoid a potential // deadlock. To make sure that our page doesn't go away, we mark // it busy for the time. page->busy = true; locker.Unlock(); // copy the contents of the page already in memory phys_addr_t pageAddress = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE + pageOffset; bool userBuffer = IS_USER_ADDRESS(buffer); if (doWrite) { if (useBuffer) { vm_memcpy_to_physical(pageAddress, (void*)buffer, bytesInPage, userBuffer); } else { vm_memset_physical(pageAddress, 0, bytesInPage); } } else if (useBuffer) { vm_memcpy_from_physical((void*)buffer, pageAddress, bytesInPage, userBuffer); } locker.Lock(); if (doWrite) { DEBUG_PAGE_ACCESS_START(page); page->modified = true; if (page->State() != PAGE_STATE_MODIFIED) vm_page_set_state(page, PAGE_STATE_MODIFIED); DEBUG_PAGE_ACCESS_END(page); } cache->MarkPageUnbusy(page); } // If it is cached only, requeue the page, so the respective queue // roughly remains LRU first sorted. if (page->State() == PAGE_STATE_CACHED || page->State() == PAGE_STATE_MODIFIED) { DEBUG_PAGE_ACCESS_START(page); vm_page_requeue(page, true); DEBUG_PAGE_ACCESS_END(page); } if (bytesLeft <= bytesInPage) { // we've read the last page, so we're done! locker.Unlock(); vm_page_unreserve_pages(&reservation); return B_OK; } // prepare a potential gap request lastBuffer = buffer + bytesInPage; lastLeft = bytesLeft - bytesInPage; lastOffset = offset + B_PAGE_SIZE; lastPageOffset = 0; } if (bytesLeft <= bytesInPage) break; buffer += bytesInPage; bytesLeft -= bytesInPage; pageOffset = 0; offset += B_PAGE_SIZE; pagesProcessed++; if (buffer - lastBuffer + lastPageOffset >= kMaxChunkSize) { status_t status = satisfy_cache_io(ref, cookie, function, offset, buffer, useBuffer, pageOffset, bytesLeft, reservePages, lastOffset, lastBuffer, lastPageOffset, lastLeft, lastReservedPages, &reservation); if (status != B_OK) return status; } } // fill the last remaining bytes of the request (either write or read) return function(ref, cookie, lastOffset, lastPageOffset, lastBuffer, lastLeft, useBuffer, &reservation, 0); }
/*! Like read_into_cache() but writes data into the cache. To preserve data consistency, it might also read pages into the cache, though, if only a partial page gets written. The same restrictions apply. */ static status_t write_to_cache(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, vm_page_reservation* reservation, size_t reservePages) { // TODO: We're using way too much stack! Rather allocate a sufficiently // large chunk on the heap. generic_io_vec vecs[MAX_IO_VECS]; uint32 vecCount = 0; generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); vm_page* pages[MAX_IO_VECS]; int32 pageIndex = 0; status_t status = B_OK; // ToDo: this should be settable somewhere bool writeThrough = false; // allocate pages for the cache and mark them busy for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { // TODO: if space is becoming tight, and this cache is already grown // big - shouldn't we better steal the pages directly in that case? // (a working set like approach for the file cache) // TODO: the pages we allocate here should have been reserved upfront // in cache_io() vm_page* page = pages[pageIndex++] = vm_page_allocate_page( reservation, (writeThrough ? PAGE_STATE_CACHED : PAGE_STATE_MODIFIED) | VM_PAGE_ALLOC_BUSY); page->modified = !writeThrough; ref->cache->InsertPage(page, offset + pos); add_to_iovec(vecs, vecCount, MAX_IO_VECS, page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); } push_access(ref, offset, bufferSize, true); ref->cache->Unlock(); vm_page_unreserve_pages(reservation); // copy contents (and read in partially written pages first) if (pageOffset != 0) { // This is only a partial write, so we have to read the rest of the page // from the file to have consistent data in the cache generic_io_vec readVec = { vecs[0].base, B_PAGE_SIZE }; generic_size_t bytesRead = B_PAGE_SIZE; status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); // ToDo: handle errors for real! if (status < B_OK) panic("1. vfs_read_pages() failed: %s!\n", strerror(status)); } size_t lastPageOffset = (pageOffset + bufferSize) % B_PAGE_SIZE; if (lastPageOffset != 0) { // get the last page in the I/O vectors generic_addr_t last = vecs[vecCount - 1].base + vecs[vecCount - 1].length - B_PAGE_SIZE; if ((off_t)(offset + pageOffset + bufferSize) == ref->cache->virtual_end) { // the space in the page after this write action needs to be cleaned vm_memset_physical(last + lastPageOffset, 0, B_PAGE_SIZE - lastPageOffset); } else { // the end of this write does not happen on a page boundary, so we // need to fetch the last page before we can update it generic_io_vec readVec = { last, B_PAGE_SIZE }; generic_size_t bytesRead = B_PAGE_SIZE; status = vfs_read_pages(ref->vnode, cookie, PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE, &readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead); // ToDo: handle errors for real! if (status < B_OK) panic("vfs_read_pages() failed: %s!\n", strerror(status)); if (bytesRead < B_PAGE_SIZE) { // the space beyond the file size needs to be cleaned vm_memset_physical(last + bytesRead, 0, B_PAGE_SIZE - bytesRead); } } } for (uint32 i = 0; i < vecCount; i++) { generic_addr_t base = vecs[i].base; generic_size_t bytes = min_c((generic_size_t)bufferSize, generic_size_t(vecs[i].length - pageOffset)); if (useBuffer) { // copy data from user buffer vm_memcpy_to_physical(base + pageOffset, (void*)buffer, bytes, IS_USER_ADDRESS(buffer)); } else { // clear buffer instead vm_memset_physical(base + pageOffset, 0, bytes); } bufferSize -= bytes; if (bufferSize == 0) break; buffer += bytes; pageOffset = 0; } if (writeThrough) { // write cached pages back to the file if we were asked to do that status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs, vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); if (status < B_OK) { // ToDo: remove allocated pages, ...? panic("file_cache: remove allocated pages! write pages failed: %s\n", strerror(status)); } } if (status == B_OK) reserve_pages(ref, reservation, reservePages, true); ref->cache->Lock(); // make the pages accessible in the cache for (int32 i = pageIndex; i-- > 0;) { ref->cache->MarkPageUnbusy(pages[i]); DEBUG_PAGE_ACCESS_END(pages[i]); } return status; }
/*! Reads the requested amount of data into the cache, and allocates pages needed to fulfill that request. This function is called by cache_io(). It can only handle a certain amount of bytes, and the caller must make sure that it matches that criterion. The cache_ref lock must be held when calling this function; during operation it will unlock the cache, though. */ static status_t read_into_cache(file_cache_ref* ref, void* cookie, off_t offset, int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer, vm_page_reservation* reservation, size_t reservePages) { TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, " "bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize)); VMCache* cache = ref->cache; // TODO: We're using way too much stack! Rather allocate a sufficiently // large chunk on the heap. generic_io_vec vecs[MAX_IO_VECS]; uint32 vecCount = 0; generic_size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize); vm_page* pages[MAX_IO_VECS]; int32 pageIndex = 0; // allocate pages for the cache and mark them busy for (generic_size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) { vm_page* page = pages[pageIndex++] = vm_page_allocate_page( reservation, PAGE_STATE_CACHED | VM_PAGE_ALLOC_BUSY); cache->InsertPage(page, offset + pos); add_to_iovec(vecs, vecCount, MAX_IO_VECS, page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE); // TODO: check if the array is large enough (currently panics)! } push_access(ref, offset, bufferSize, false); cache->Unlock(); vm_page_unreserve_pages(reservation); // read file into reserved pages status_t status = read_pages_and_clear_partial(ref, cookie, offset, vecs, vecCount, B_PHYSICAL_IO_REQUEST, &numBytes); if (status != B_OK) { // reading failed, free allocated pages dprintf("file_cache: read pages failed: %s\n", strerror(status)); cache->Lock(); for (int32 i = 0; i < pageIndex; i++) { cache->NotifyPageEvents(pages[i], PAGE_EVENT_NOT_BUSY); cache->RemovePage(pages[i]); vm_page_set_state(pages[i], PAGE_STATE_FREE); } return status; } // copy the pages if needed and unmap them again for (int32 i = 0; i < pageIndex; i++) { if (useBuffer && bufferSize != 0) { size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset); vm_memcpy_from_physical((void*)buffer, pages[i]->physical_page_number * B_PAGE_SIZE + pageOffset, bytes, IS_USER_ADDRESS(buffer)); buffer += bytes; bufferSize -= bytes; pageOffset = 0; } } reserve_pages(ref, reservation, reservePages, false); cache->Lock(); // make the pages accessible in the cache for (int32 i = pageIndex; i-- > 0;) { DEBUG_PAGE_ACCESS_END(pages[i]); cache->MarkPageUnbusy(pages[i]); } return B_OK; }