kern_return_t inode_object_data_request( memory_object_t mem_obj, memory_object_control_t mem_obj_control, vm_offset_t offset, vm_size_t length, vm_prot_t desired_access) { struct i_mem_object *imo; kern_return_t kr; struct vm_area_struct fake_vma; struct vm_area_struct *vma; unsigned long page; int i; struct task_struct *tsk; struct osfmach3_mach_task_struct *mach_task; struct mm_struct *mm; unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access); extern unsigned long filemap_nopage(struct vm_area_struct *area, unsigned long address, int no_share); #ifdef INODE_PAGER_DEBUG if (inode_pager_debug) { printk("inode_object_data_request: obj 0x%x offset 0x%x length 0x%x\n", mem_obj, offset, length); } #endif /* INODE_PAGER_DEBUG */ if (length % PAGE_SIZE) panic("inode_object_data_request: bad length"); imo = inode_pager_check_request(mem_obj, mem_obj_control); /* * We don't know which user task caused the page fault, * so just take the first vm_area that maps this inode * and pretend that's the faulting one. * XXX This relies on the fact that all mappings of a given * inode are done with the same vm_ops... * XXX of course this is a BAAAAD and WRONG assumption ! */ vma = imo->imo_inode->i_mmap; ASSERT(vma); ASSERT(vma->vm_ops); ASSERT(vma->vm_ops->nopage); mm = vma->vm_mm; ASSERT(mm); #if 0 /* this can fail !!!??? */ ASSERT(mm->count > 0); #endif mach_task = mm->mm_mach_task; ASSERT(mach_task); for (i = 0; i < NR_TASKS; i++) { if (task[i] && task[i]->osfmach3.task == mach_task) { break; } } if (i == NR_TASKS) { panic("inode_object_data_request: can't locate target task\n"); } tsk = task[i]; ASSERT(tsk); ASSERT(tsk->mm == mm || tsk->mm == &init_mm); fake_vma.vm_inode = imo->imo_inode; fake_vma.vm_ops = vma->vm_ops; fake_vma.vm_start = 0; fake_vma.vm_offset = offset; fake_vma.vm_end = offset + PAGE_SIZE; /* XXX we might map too much ! */ /* XXX take the identity of the task that did the mapping */ current->uid = tsk->uid; current->euid = tsk->euid; current->suid = tsk->suid; current->fsuid = tsk->fsuid; current->gid = tsk->gid; current->egid = tsk->egid; current->sgid = tsk->sgid; current->fsgid = tsk->fsgid; for (i = 0; i < NGROUPS; i++) current->groups[i] = tsk->groups[i]; nopage = fake_vma.vm_ops->nopage; page = nopage(&fake_vma, 0, (nopage != filemap_nopage) /* no_share */); /* take back our (ghost) identity */ current->uid = 0; current->euid = 0; current->suid = 0; current->fsuid = 0; current->gid = 0; current->egid = 0; current->sgid = 0; current->fsgid = 0; current->groups[0] = NOGROUP; if (!page) { #ifdef INODE_PAGER_DEBUG if (inode_pager_debug) { printk("mo_data_request: mo_data_error" "(mo_ctl=0x%x, off=0x%x, size=0x%x)\n", mem_obj_control, offset, length); #if 0 printk("mo_data_request: SIGBUS for P%d[%s]\n", tsk->pid, tsk->comm); #endif } #endif /* INODE_PAGER_DEBUG */ #if 0 /* "tsk" is not necessarily the task that caused the fault */ force_sig(SIGBUS, tsk); #endif kr = memory_object_data_error(mem_obj_control, offset, length, KERN_MEMORY_ERROR); if (kr != KERN_SUCCESS) { MACH3_DEBUG(1, kr, ("mo_data_request: mo_data_error")); panic("mo_data_request: mo_data_error"); } return KERN_SUCCESS; } #if 0 /* CAUTION: tsk might not exist anymore at this point... */ ++tsk->maj_flt; ++vma->vm_mm->rss; #endif #ifdef INODE_PAGER_DEBUG if (inode_pager_debug) { printk("mo_data_request: mo_data_supply(mo_ctl=0x%x, off=0x%x, data=0x%lx, size=0x%x, dealloc=FALSE, lock=VM_PROT_NONE, precious=FALSE, reply_port=NULL)\n", mem_obj_control, offset, page, length); } #endif /* INODE_PAGER_DEBUG */ kr = memory_object_data_supply(mem_obj_control, offset, page, length, FALSE, VM_PROT_NONE, FALSE, MACH_PORT_NULL); if (kr != KERN_SUCCESS) { MACH3_DEBUG(1, kr, ("mo_data_request: mo_data_supply")); panic("mo_data_request: mo_data_supply failed"); } if (nopage == filemap_nopage) { /* * release the extra reference due to the sharing: * the microkernel takes care of the sharing for us and * keeping this reference would prevent the "shrinker" * from freeing this page if needed. */ ASSERT(mem_map[MAP_NR(page)].count > 1); atomic_dec(&(mem_map[MAP_NR(page)].count)); } else { /* page was allocated just for us: free it now */ free_page(page); } #ifdef INODE_PAGER_DEBUG if (inode_pager_debug) { printk("mo_data_request: done\n"); } #endif /* INODE_PAGER_DEBUG */ return KERN_SUCCESS; }
/* Implement pagein callback as described in <mach/memory_object.defs>. */ kern_return_t _pager_seqnos_memory_object_data_request (mach_port_t object, mach_port_seqno_t seqno, mach_port_t control, vm_offset_t offset, vm_size_t length, vm_prot_t access) { struct pager *p; short *pm_entry; int doread, doerror; error_t err; vm_address_t page; int write_lock; p = ports_lookup_port (0, object, _pager_class); if (!p) return EOPNOTSUPP; /* Acquire the right to meddle with the pagemap */ mutex_lock (&p->interlock); _pager_wait_for_seqno (p, seqno); /* sanity checks -- we don't do multi-page requests yet. */ if (control != p->memobjcntl) { printf ("incg data request: wrong control port\n"); goto release_out; } if (length != __vm_page_size) { printf ("incg data request: bad length size %zd\n", length); goto release_out; } if (offset % __vm_page_size) { printf ("incg data request: misaligned request\n"); goto release_out; } _pager_block_termination (p); /* prevent termination until mark_object_error is done */ if (p->pager_state != NORMAL) { printf ("pager in wrong state for read\n"); goto allow_release_out; } err = _pager_pagemap_resize (p, offset + length); if (err) goto allow_release_out; /* Can't do much about the actual error. */ /* If someone is paging this out right now, the disk contents are unreliable, so we have to wait. It is too expensive (right now) to find the data and return it, and then interrupt the write, so we just mark the page and have the writing thread do m_o_data_supply when it gets around to it. */ pm_entry = &p->pagemap[offset / __vm_page_size]; if (*pm_entry & PM_PAGINGOUT) { doread = 0; *pm_entry |= PM_PAGEINWAIT; } else doread = 1; if (*pm_entry & PM_INVALID) doerror = 1; else doerror = 0; *pm_entry |= PM_INCORE; if (PM_NEXTERROR (*pm_entry) != PAGE_NOERR && (access & VM_PROT_WRITE)) { memory_object_data_error (control, offset, length, _pager_page_errors[PM_NEXTERROR (*pm_entry)]); _pager_mark_object_error (p, offset, length, _pager_page_errors[PM_NEXTERROR (*pm_entry)]); *pm_entry = SET_PM_NEXTERROR (*pm_entry, PAGE_NOERR); doread = 0; } /* Let someone else in. */ _pager_release_seqno (p, seqno); mutex_unlock (&p->interlock); if (!doread) goto allow_term_out; if (doerror) goto error_read; err = pager_read_page (p->upi, offset, &page, &write_lock); if (err) goto error_read; memory_object_data_supply (p->memobjcntl, offset, page, length, 1, write_lock ? VM_PROT_WRITE : VM_PROT_NONE, 0, MACH_PORT_NULL); mutex_lock (&p->interlock); _pager_mark_object_error (p, offset, length, 0); _pager_allow_termination (p); mutex_unlock (&p->interlock); ports_port_deref (p); return 0; error_read: memory_object_data_error (p->memobjcntl, offset, length, EIO); _pager_mark_object_error (p, offset, length, EIO); allow_term_out: mutex_lock (&p->interlock); _pager_allow_termination (p); mutex_unlock (&p->interlock); ports_port_deref (p); return 0; allow_release_out: _pager_allow_termination (p); release_out: _pager_release_seqno (p, seqno); mutex_unlock (&p->interlock); ports_port_deref (p); return 0; }