/* * Run the chain and if the bottom-most object is a vnode-type lock the * underlying vnode. A locked vnode or NULL is returned. */ struct vnode * vnode_pager_lock(vm_object_t object) { struct vnode *vp = NULL; vm_object_t lobject; vm_object_t tobject; int error; if (object == NULL) return(NULL); ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); lobject = object; while (lobject->type != OBJT_VNODE) { if (lobject->flags & OBJ_DEAD) break; tobject = lobject->backing_object; if (tobject == NULL) break; vm_object_hold_shared(tobject); if (tobject == lobject->backing_object) { if (lobject != object) { vm_object_lock_swap(); vm_object_drop(lobject); } lobject = tobject; } else { vm_object_drop(tobject); } } while (lobject->type == OBJT_VNODE && (lobject->flags & OBJ_DEAD) == 0) { /* * Extract the vp */ vp = lobject->handle; error = vget(vp, LK_SHARED | LK_RETRY | LK_CANRECURSE); if (error == 0) { if (lobject->handle == vp) break; vput(vp); } else { kprintf("vnode_pager_lock: vp %p error %d " "lockstatus %d, retrying\n", vp, error, lockstatus(&vp->v_lock, curthread)); tsleep(object->handle, 0, "vnpgrl", hz); } vp = NULL; } if (lobject != object) vm_object_drop(lobject); return (vp); }
/* * Add a ref to a vnode's existing VM object, return the object or * NULL if the vnode did not have one. This does not create the * object (we can't since we don't know what the proper blocksize/boff * is to match the VFS's use of the buffer cache). */ vm_object_t vnode_pager_reference(struct vnode *vp) { vm_object_t object; /* * Prevent race condition when allocating the object. This * can happen with NFS vnodes since the nfsnode isn't locked. * * Serialize potential vnode/object teardowns and interlocks */ lwkt_gettoken(&vp->v_token); while (vp->v_flag & VOLOCK) { vsetflags(vp, VOWANT); tsleep(vp, 0, "vnpobj", 0); } vsetflags(vp, VOLOCK); lwkt_reltoken(&vp->v_token); /* * Prevent race conditions against deallocation of the VM * object. */ while ((object = vp->v_object) != NULL) { vm_object_hold(object); if ((object->flags & OBJ_DEAD) == 0) break; vm_object_dead_sleep(object, "vadead"); vm_object_drop(object); } /* * The object is expected to exist, the caller will handle * NULL returns if it does not. */ if (object) { object->ref_count++; vref(vp); } lwkt_gettoken(&vp->v_token); vclrflags(vp, VOLOCK); if (vp->v_flag & VOWANT) { vclrflags(vp, VOWANT); wakeup(vp); } lwkt_reltoken(&vp->v_token); if (object) vm_object_drop(object); return (object); }
static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_paddr_t paddr; vm_page_t page; vm_offset_t pidx = OFF_TO_IDX(offset); cdev_t dev; page = *mres; dev = object->handle; paddr = pmap_phys_address( dev_dmmap(dev, offset, prot, NULL)); KASSERT(paddr != -1,("dev_pager_getpage: map function returns error")); KKASSERT(object->type == OBJT_DEVICE); if (page->flags & PG_FICTITIOUS) { /* * If the passed in reqpage page is already a fake page, * update it with the new physical address. */ page->phys_addr = paddr; page->valid = VM_PAGE_BITS_ALL; } else { /* * Replace the passed in reqpage page with our own fake page * and free up all the original pages. */ page = dev_pager_getfake(paddr, object->memattr); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); vm_object_hold(object); vm_page_free(*mres); if (vm_page_insert(page, object, pidx) == FALSE) { panic("dev_pager_getpage: page (%p,%016jx) exists", object, (uintmax_t)pidx); } vm_object_drop(object); } return (VM_PAGER_OK); }
static int link_elf_obj_load_file(const char *filename, linker_file_t * result) { struct nlookupdata nd; struct thread *td = curthread; /* XXX */ struct proc *p = td->td_proc; char *pathname; struct vnode *vp; Elf_Ehdr *hdr; Elf_Shdr *shdr; Elf_Sym *es; int nbytes, i, j; vm_offset_t mapbase; size_t mapsize; int error = 0; int resid; elf_file_t ef; linker_file_t lf; int symtabindex; int symstrindex; int shstrindex; int nsym; int pb, rl, ra; int alignmask; /* XXX Hack for firmware loading where p == NULL */ if (p == NULL) { p = &proc0; } KKASSERT(p != NULL); if (p->p_ucred == NULL) { kprintf("link_elf_obj_load_file: cannot load '%s' from filesystem" " this early\n", filename); return ENOENT; } shdr = NULL; lf = NULL; mapsize = 0; hdr = NULL; pathname = linker_search_path(filename); if (pathname == NULL) return ENOENT; error = nlookup_init(&nd, pathname, UIO_SYSSPACE, NLC_FOLLOW | NLC_LOCKVP); if (error == 0) error = vn_open(&nd, NULL, FREAD, 0); kfree(pathname, M_LINKER); if (error) { nlookup_done(&nd); return error; } vp = nd.nl_open_vp; nd.nl_open_vp = NULL; nlookup_done(&nd); /* * Read the elf header from the file. */ hdr = kmalloc(sizeof(*hdr), M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, vp, (void *)hdr, sizeof(*hdr), 0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = ENOEXEC; goto out; } if (!IS_ELF(*hdr)) { error = ENOEXEC; goto out; } if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) { link_elf_obj_error(filename, "Unsupported file layout"); error = ENOEXEC; goto out; } if (hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_version != EV_CURRENT) { link_elf_obj_error(filename, "Unsupported file version"); error = ENOEXEC; goto out; } if (hdr->e_type != ET_REL) { error = ENOSYS; goto out; } if (hdr->e_machine != ELF_TARG_MACH) { link_elf_obj_error(filename, "Unsupported machine"); error = ENOEXEC; goto out; } ef = kmalloc(sizeof(struct elf_file), M_LINKER, M_WAITOK | M_ZERO); lf = linker_make_file(filename, ef, &link_elf_obj_file_ops); if (lf == NULL) { kfree(ef, M_LINKER); error = ENOMEM; goto out; } ef->nprogtab = 0; ef->e_shdr = NULL; ef->nreltab = 0; ef->nrelatab = 0; /* Allocate and read in the section header */ nbytes = hdr->e_shnum * hdr->e_shentsize; if (nbytes == 0 || hdr->e_shoff == 0 || hdr->e_shentsize != sizeof(Elf_Shdr)) { error = ENOEXEC; goto out; } shdr = kmalloc(nbytes, M_LINKER, M_WAITOK); ef->e_shdr = shdr; error = vn_rdwr(UIO_READ, vp, (caddr_t) shdr, nbytes, hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid) { error = ENOEXEC; goto out; } /* Scan the section header for information and table sizing. */ nsym = 0; symtabindex = -1; symstrindex = -1; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: ef->nprogtab++; break; case SHT_SYMTAB: nsym++; symtabindex = i; symstrindex = shdr[i].sh_link; break; case SHT_REL: ef->nreltab++; break; case SHT_RELA: ef->nrelatab++; break; case SHT_STRTAB: break; } } if (ef->nprogtab == 0) { link_elf_obj_error(filename, "file has no contents"); error = ENOEXEC; goto out; } if (nsym != 1) { /* Only allow one symbol table for now */ link_elf_obj_error(filename, "file has no valid symbol table"); error = ENOEXEC; goto out; } if (symstrindex < 0 || symstrindex > hdr->e_shnum || shdr[symstrindex].sh_type != SHT_STRTAB) { link_elf_obj_error(filename, "file has invalid symbol strings"); error = ENOEXEC; goto out; } /* Allocate space for tracking the load chunks */ if (ef->nprogtab != 0) ef->progtab = kmalloc(ef->nprogtab * sizeof(*ef->progtab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nreltab != 0) ef->reltab = kmalloc(ef->nreltab * sizeof(*ef->reltab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nrelatab != 0) ef->relatab = kmalloc(ef->nrelatab * sizeof(*ef->relatab), M_LINKER, M_WAITOK | M_ZERO); if ((ef->nprogtab != 0 && ef->progtab == NULL) || (ef->nreltab != 0 && ef->reltab == NULL) || (ef->nrelatab != 0 && ef->relatab == NULL)) { error = ENOMEM; goto out; } if (symtabindex == -1) panic("lost symbol table index"); /* Allocate space for and load the symbol table */ ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym); ef->ddbsymtab = kmalloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, vp, (void *)ef->ddbsymtab, shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } if (symstrindex == -1) panic("lost symbol string index"); /* Allocate space for and load the symbol strings */ ef->ddbstrcnt = shdr[symstrindex].sh_size; ef->ddbstrtab = kmalloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, vp, ef->ddbstrtab, shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } /* Do we have a string table for the section names? */ shstrindex = -1; if (hdr->e_shstrndx != 0 && shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) { shstrindex = hdr->e_shstrndx; ef->shstrcnt = shdr[shstrindex].sh_size; ef->shstrtab = kmalloc(shdr[shstrindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, vp, ef->shstrtab, shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } } /* Size up code/data(progbits) and bss(nobits). */ alignmask = 0; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: alignmask = shdr[i].sh_addralign - 1; mapsize += alignmask; mapsize &= ~alignmask; mapsize += shdr[i].sh_size; break; } } /* * We know how much space we need for the text/data/bss/etc. This * stuff needs to be in a single chunk so that profiling etc can get * the bounds and gdb can associate offsets with modules */ ef->object = vm_object_allocate(OBJT_DEFAULT, round_page(mapsize) >> PAGE_SHIFT); if (ef->object == NULL) { error = ENOMEM; goto out; } vm_object_hold(ef->object); vm_object_reference_locked(ef->object); ef->address = (caddr_t) vm_map_min(&kernel_map); ef->bytes = 0; /* * In order to satisfy x86_64's architectural requirements on the * location of code and data in the kernel's address space, request a * mapping that is above the kernel. * * vkernel64's text+data is outside the managed VM space entirely. */ #if defined(__x86_64__) && defined(_KERNEL_VIRTUAL) error = vkernel_module_memory_alloc(&mapbase, round_page(mapsize)); vm_object_drop(ef->object); #else mapbase = KERNBASE; error = vm_map_find(&kernel_map, ef->object, NULL, 0, &mapbase, round_page(mapsize), PAGE_SIZE, TRUE, VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, FALSE); vm_object_drop(ef->object); if (error) { vm_object_deallocate(ef->object); ef->object = NULL; goto out; } /* Wire the pages */ error = vm_map_wire(&kernel_map, mapbase, mapbase + round_page(mapsize), 0); #endif if (error != KERN_SUCCESS) { error = ENOMEM; goto out; } /* Inform the kld system about the situation */ lf->address = ef->address = (caddr_t) mapbase; lf->size = round_page(mapsize); ef->bytes = mapsize; /* * Now load code/data(progbits), zero bss(nobits), allocate space for * and load relocs */ pb = 0; rl = 0; ra = 0; alignmask = 0; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: alignmask = shdr[i].sh_addralign - 1; mapbase += alignmask; mapbase &= ~alignmask; if (ef->shstrtab && shdr[i].sh_name != 0) ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; else if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<<PROGBITS>>"; else ef->progtab[pb].name = "<<NOBITS>>"; #if 0 if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, "set_pcpu")) ef->progtab[pb].addr = dpcpu_alloc(shdr[i].sh_size); #ifdef VIMAGE else if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) ef->progtab[pb].addr = vnet_data_alloc(shdr[i].sh_size); #endif else #endif ef->progtab[pb].addr = (void *)(uintptr_t) mapbase; if (ef->progtab[pb].addr == NULL) { error = ENOSPC; goto out; } ef->progtab[pb].size = shdr[i].sh_size; ef->progtab[pb].sec = i; if (shdr[i].sh_type == SHT_PROGBITS) { error = vn_rdwr(UIO_READ, vp, ef->progtab[pb].addr, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } #if 0 /* Initialize the per-cpu or vnet area. */ if (ef->progtab[pb].addr != (void *)mapbase && !strcmp(ef->progtab[pb].name, "set_pcpu")) dpcpu_copy(ef->progtab[pb].addr, shdr[i].sh_size); #ifdef VIMAGE else if (ef->progtab[pb].addr != (void *)mapbase && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) vnet_data_copy(ef->progtab[pb].addr, shdr[i].sh_size); #endif #endif } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { es = &ef->ddbsymtab[j]; if (es->st_shndx != i) continue; es->st_value += (Elf_Addr) ef->progtab[pb].addr; } mapbase += shdr[i].sh_size; pb++; break; case SHT_REL: ef->reltab[rl].rel = kmalloc(shdr[i].sh_size, M_LINKER, M_WAITOK); ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel); ef->reltab[rl].sec = shdr[i].sh_info; error = vn_rdwr(UIO_READ, vp, (void *)ef->reltab[rl].rel, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } rl++; break; case SHT_RELA: ef->relatab[ra].rela = kmalloc(shdr[i].sh_size, M_LINKER, M_WAITOK); ef->relatab[ra].nrela = shdr[i].sh_size / sizeof(Elf_Rela); ef->relatab[ra].sec = shdr[i].sh_info; error = vn_rdwr(UIO_READ, vp, (void *)ef->relatab[ra].rela, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; if (resid != 0) { error = EINVAL; goto out; } ra++; break; } } if (pb != ef->nprogtab) panic("lost progbits"); if (rl != ef->nreltab) panic("lost reltab"); if (ra != ef->nrelatab) panic("lost relatab"); if (mapbase != (vm_offset_t) ef->address + mapsize) panic("mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)", mapbase, ef->address, mapsize, (vm_offset_t) ef->address + mapsize); /* Local intra-module relocations */ link_elf_obj_reloc_local(lf); /* Pull in dependencies */ error = linker_load_dependencies(lf); if (error) goto out; /* External relocations */ error = relocate_file(lf); if (error) goto out; *result = lf; out: if (error && lf) linker_file_unload(lf /*, LINKER_UNLOAD_FORCE */); if (hdr) kfree(hdr, M_LINKER); vn_unlock(vp); vn_close(vp, FREAD, NULL); return error; }
vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { cdev_t dev; vm_object_t object; u_short color; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); size = round_page64(size); if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) return (NULL); /* * Look up pager, creating as necessary. */ mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. */ object = vm_object_allocate_hold(tp, OFF_TO_IDX(foff + size)); object->handle = handle; object->un_pager.devp.ops = ops; object->un_pager.devp.dev = handle; TAILQ_INIT(&object->un_pager.devp.devp_pglist); /* * handle is only a device for old_dev_pager_ctor. */ if (ops->cdev_pg_ctor == old_dev_pager_ctor) { dev = handle; dev->si_object = object; } TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); vm_object_drop(object); } else { /* * Gain a reference to the object. */ vm_object_hold(object); vm_object_reference_locked(object); if (OFF_TO_IDX(foff + size) > object->size) object->size = OFF_TO_IDX(foff + size); vm_object_drop(object); } mtx_unlock(&dev_pager_mtx); return (object); }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&map->token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? * * XXX vm_token - legacy for pmap_ts_referenced * in i386 and vkernel pmap code. */ lwkt_gettoken(&vm_token); vm_object_hold(current->object.vm_object); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } vm_object_drop(current->object.vm_object); lwkt_reltoken(&vm_token); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&map->token); return (error); }
/* * The map entries can *almost* be read with programs like cat. However, * large maps need special programs to read. It is not easy to implement * a program that can sense the required size of the buffer, and then * subsequently do a read with the appropriate size. This operation cannot * be atomic. The best that we can do is to allow the program to do a read * with an arbitrarily large buffer, and return as much as we can. We can * return an error code if the buffer is too small (EFBIG), then the program * can try a bigger buffer. */ int procfs_domap(struct proc *curp, struct lwp *lp, struct pfsnode *pfs, struct uio *uio) { struct proc *p = lp->lwp_proc; int len; struct vnode *vp; char *fullpath, *freepath; int error; vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = vmspace_pmap(p->p_vmspace); vm_map_entry_t entry; char mebuffer[MEBUFFERSIZE]; if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); if (uio->uio_offset != 0) return (0); error = 0; vm_map_lock_read(map); for (entry = map->header.next; ((uio->uio_resid > 0) && (entry != &map->header)); entry = entry->next) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t addr; vm_offset_t ostart; int resident, privateresident; char *type; if (entry->maptype != VM_MAPTYPE_NORMAL && entry->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } obj = entry->object.vm_object; if (obj) vm_object_hold(obj); if (obj && (obj->shadow_count == 1)) privateresident = obj->resident_page_count; else privateresident = 0; /* * Use map->hint as a poor man's ripout detector. */ map->hint = entry; ostart = entry->start; /* * Count resident pages (XXX can be horrible on 64-bit) */ resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(pmap, addr)) resident++; addr += PAGE_SIZE; } if (obj) { lobj = obj; while ((tobj = lobj->backing_object) != NULL) { KKASSERT(tobj != obj); vm_object_hold(tobj); if (tobj == lobj->backing_object) { if (lobj != obj) { vm_object_lock_swap(); vm_object_drop(lobj); } lobj = tobj; } else { vm_object_drop(tobj); } } } else { lobj = NULL; } freepath = NULL; fullpath = "-"; if (lobj) { switch(lobj->type) { default: case OBJT_DEFAULT: type = "default"; vp = NULL; break; case OBJT_VNODE: type = "vnode"; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: type = "swap"; vp = NULL; break; case OBJT_DEVICE: type = "device"; vp = NULL; break; } flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; if (vp != NULL) { vn_fullpath(p, vp, &fullpath, &freepath, 1); vrele(vp); } if (lobj != obj) vm_object_drop(lobj); } else { type = "none"; flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, res, priv res, cow, access, type, (fullpath). */ ksnprintf(mebuffer, sizeof(mebuffer), #if LONG_BIT == 64 "0x%016lx 0x%016lx %d %d %p %s%s%s %d %d " #else "0x%08lx 0x%08lx %d %d %p %s%s%s %d %d " #endif "0x%04x %s %s %s %s\n", (u_long)entry->start, (u_long)entry->end, resident, privateresident, obj, (entry->protection & VM_PROT_READ)?"r":"-", (entry->protection & VM_PROT_WRITE)?"w":"-", (entry->protection & VM_PROT_EXECUTE)?"x":"-", ref_count, shadow_count, flags, (entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW", (entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", type, fullpath); if (obj) vm_object_drop(obj); if (freepath != NULL) { kfree(freepath, M_TEMP); freepath = NULL; } len = strlen(mebuffer); if (len > uio->uio_resid) { error = EFBIG; break; } /* * We cannot safely hold the map locked while accessing * userspace as a VM fault might recurse the locked map. */ vm_map_unlock_read(map); error = uiomove(mebuffer, len, uio); vm_map_lock_read(map); if (error) break; /* * We use map->hint as a poor man's ripout detector. If * it does not match the entry we set it to prior to * unlocking the map the entry MIGHT now be stale. In * this case we do an expensive lookup to find our place * in the iteration again. */ if (map->hint != entry) { vm_map_entry_t reentry; vm_map_lookup_entry(map, ostart, &reentry); entry = reentry; } } vm_map_unlock_read(map); return error; }
/* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * NOTE: This routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. * * NOTE: vp->v_filesize is initialized to NOOFFSET (-1), be sure that * we do not blow up on the case. nsize will always be >= 0, however. */ void vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) { vm_pindex_t nobjsize; vm_pindex_t oobjsize; vm_object_t object; object = vp->v_object; if (object == NULL) return; vm_object_hold(object); KKASSERT(vp->v_object == object); /* * Hasn't changed size */ if (nsize == vp->v_filesize) { vm_object_drop(object); return; } /* * Has changed size. Adjust the VM object's size and v_filesize * before we start scanning pages to prevent new pages from being * allocated during the scan. */ nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); oobjsize = object->size; object->size = nobjsize; /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nsize < vp->v_filesize) { vp->v_filesize = nsize; if (nobjsize < oobjsize) { vm_object_page_remove(object, nobjsize, oobjsize, FALSE); } /* * This gets rid of garbage at the end of a page that is now * only partially backed by the vnode. Since we are setting * the entire page valid & clean after we are done we have * to be sure that the portion of the page within the file * bounds is already valid. If it isn't then making it * valid would create a corrupt block. */ if (nsize & PAGE_MASK) { vm_offset_t kva; vm_page_t m; m = vm_page_lookup_busy_wait(object, OFF_TO_IDX(nsize), TRUE, "vsetsz"); if (m && m->valid) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; struct lwbuf *lwb; struct lwbuf lwb_cache; /* * Clear out partial-page garbage in case * the page has been mapped. * * This is byte aligned. */ lwb = lwbuf_alloc(m, &lwb_cache); kva = lwbuf_kva(lwb); bzero((caddr_t)kva + base, size); lwbuf_free(lwb); /* * XXX work around SMP data integrity race * by unmapping the page from user processes. * The garbage we just cleared may be mapped * to a user process running on another cpu * and this code is not running through normal * I/O channels which handle SMP issues for * us, so unmap page to synchronize all cpus. * * XXX should vm_pager_unmap_page() have * dealt with this? */ vm_page_protect(m, VM_PROT_NONE); /* * Clear out partial-page dirty bits. This * has the side effect of setting the valid * bits, but that is ok. There are a bunch * of places in the VM system where we expected * m->dirty == VM_PAGE_BITS_ALL. The file EOF * case is one of them. If the page is still * partially dirty, make it fully dirty. * * NOTE: We do not clear out the valid * bits. This would prevent bogus_page * replacement from working properly. * * NOTE: We do not want to clear the dirty * bit for a partial DEV_BSIZE'd truncation! * This is DEV_BSIZE aligned! */ vm_page_clear_dirty_beg_nonincl(m, base, size); if (m->dirty != 0) m->dirty = VM_PAGE_BITS_ALL; vm_page_wakeup(m); } else if (m) { vm_page_wakeup(m); } } } else { vp->v_filesize = nsize; } vm_object_drop(object); }
/* * Allocate a VM object for a vnode, typically a regular file vnode. * * Some additional information is required to generate a properly sized * object which covers the entire buffer cache buffer straddling the file * EOF. Userland does not see the extra pages as the VM fault code tests * against v_filesize. */ vm_object_t vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset, int blksize, int boff) { vm_object_t object; struct vnode *vp; off_t loffset; vm_pindex_t lsize; /* * Pageout to vnode, no can do yet. */ if (handle == NULL) return (NULL); /* * XXX hack - This initialization should be put somewhere else. */ if (vnode_pbuf_freecnt < 0) { vnode_pbuf_freecnt = nswbuf / 2 + 1; } /* * Serialize potential vnode/object teardowns and interlocks */ vp = (struct vnode *)handle; lwkt_gettoken(&vp->v_token); /* * If the object is being terminated, wait for it to * go away. */ object = vp->v_object; if (object) { vm_object_hold(object); KKASSERT((object->flags & OBJ_DEAD) == 0); } if (VREFCNT(vp) <= 0) panic("vnode_pager_alloc: no vnode reference"); /* * Round up to the *next* block, then destroy the buffers in question. * Since we are only removing some of the buffers we must rely on the * scan count to determine whether a loop is necessary. * * Destroy any pages beyond the last buffer. */ if (boff < 0) boff = (int)(length % blksize); if (boff) loffset = length + (blksize - boff); else loffset = length; lsize = OFF_TO_IDX(round_page64(loffset)); if (object == NULL) { /* * And an object of the appropriate size */ object = vm_object_allocate_hold(OBJT_VNODE, lsize); object->handle = handle; vp->v_object = object; vp->v_filesize = length; if (vp->v_mount && (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC)) vm_object_set_flag(object, OBJ_NOMSYNC); vref(vp); } else { vm_object_reference_quick(object); /* also vref's */ if (object->size != lsize) { kprintf("vnode_pager_alloc: Warning, objsize " "mismatch %jd/%jd vp=%p obj=%p\n", (intmax_t)object->size, (intmax_t)lsize, vp, object); } if (vp->v_filesize != length) { kprintf("vnode_pager_alloc: Warning, filesize " "mismatch %jd/%jd vp=%p obj=%p\n", (intmax_t)vp->v_filesize, (intmax_t)length, vp, object); } } vm_object_drop(object); lwkt_reltoken(&vp->v_token); return (object); }
/* * vm_contig_pg_kmap: * * Map previously allocated (vm_contig_pg_alloc) range of pages from * vm_page_array[] into the KVA. Once mapped, the pages are part of * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). * * No requirements. */ vm_offset_t vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) { vm_offset_t addr, tmp_addr; vm_page_t pga = vm_page_array; int i, count; size = round_page(size); if (size == 0) panic("vm_contig_pg_kmap: size must not be 0"); crit_enter(); lwkt_gettoken(&vm_token); /* * We've found a contiguous chunk that meets our requirements. * Allocate KVM, and assign phys pages and return a kernel VM * pointer. */ count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr) != KERN_SUCCESS) { /* * XXX We almost never run out of kernel virtual * space, so we don't make the allocated memory * above available. */ vm_map_unlock(map); vm_map_entry_release(count); lwkt_reltoken(&vm_token); crit_exit(); return (0); } /* * kernel_object maps 1:1 to kernel_map. */ vm_object_hold(&kernel_object); vm_object_reference(&kernel_object); vm_map_insert(map, &count, &kernel_object, addr, addr, addr + size, VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); vm_map_entry_release(count); tmp_addr = addr; for (i = start; i < (start + size / PAGE_SIZE); i++) { vm_page_t m = &pga[i]; vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)); if ((flags & M_ZERO) && !(m->flags & PG_ZERO)) pmap_zero_page(VM_PAGE_TO_PHYS(m)); m->flags = 0; tmp_addr += PAGE_SIZE; } vm_map_wire(map, addr, addr + size, 0); vm_object_drop(&kernel_object); lwkt_reltoken(&vm_token); crit_exit(); return (addr); }
/* * vm_contig_pg_clean: * * Do a thorough cleanup of the specified 'queue', which can be either * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not * marked dirty, it is shoved into the page cache, provided no one has * currently aqcuired it, otherwise localized action per object type * is taken for cleanup: * * In the OBJT_VNODE case, the whole page range is cleaned up * using the vm_object_page_clean() routine, by specyfing a * start and end of '0'. * * Otherwise if the object is of any other type, the generic * pageout (daemon) flush routine is invoked. */ static void vm_contig_pg_clean(int queue, int count) { vm_object_t object; vm_page_t m, m_tmp; struct vm_page marker; struct vpgqueues *pq = &vm_page_queues[queue]; /* * Setup a local marker */ bzero(&marker, sizeof(marker)); marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; marker.queue = queue; marker.wire_count = 1; vm_page_queues_spin_lock(queue); TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); /* * Iterate the queue. Note that the vm_page spinlock must be * acquired before the pageq spinlock so it's easiest to simply * not hold it in the loop iteration. */ while (count-- > 0 && (m = TAILQ_NEXT(&marker, pageq)) != NULL) { vm_page_and_queue_spin_lock(m); if (m != TAILQ_NEXT(&marker, pageq)) { vm_page_and_queue_spin_unlock(m); ++count; continue; } KKASSERT(m->queue == queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq); if (m->flags & PG_MARKER) { vm_page_and_queue_spin_unlock(m); continue; } if (vm_page_busy_try(m, TRUE)) { vm_page_and_queue_spin_unlock(m); continue; } vm_page_and_queue_spin_unlock(m); /* * We've successfully busied the page */ if (m->queue - m->pc != queue) { vm_page_wakeup(m); continue; } if (m->wire_count || m->hold_count) { vm_page_wakeup(m); continue; } if ((object = m->object) == NULL) { vm_page_wakeup(m); continue; } vm_page_test_dirty(m); if (m->dirty || (m->flags & PG_NEED_COMMIT)) { vm_object_hold(object); KKASSERT(m->object == object); if (object->type == OBJT_VNODE) { vm_page_wakeup(m); vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); vm_object_page_clean(object, 0, 0, OBJPC_SYNC); vn_unlock(((struct vnode *)object->handle)); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { m_tmp = m; vm_pageout_flush(&m_tmp, 1, 0); } else { vm_page_wakeup(m); } vm_object_drop(object); } else if (m->hold_count == 0) { vm_page_cache(m); } else { vm_page_wakeup(m); } } /* * Scrap our local marker */ vm_page_queues_spin_lock(queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); }
/* * A VFS can call this function to try to dispose of a read request * directly from the VM system, pretty much bypassing almost all VFS * overhead except for atime updates. * * If 0 is returned some or all of the uio was handled. The caller must * check the uio and handle the remainder. * * The caller must fail on a non-zero error. */ int vop_helper_read_shortcut(struct vop_read_args *ap) { struct vnode *vp; struct uio *uio; struct lwbuf *lwb; struct lwbuf lwb_cache; vm_object_t obj; vm_page_t m; int offset; int n; int error; vp = ap->a_vp; uio = ap->a_uio; /* * We can't short-cut if there is no VM object or this is a special * UIO_NOCOPY read (typically from VOP_STRATEGY()). We also can't * do this if we cannot extract the filesize from the vnode. */ if (vm_read_shortcut_enable == 0) return(0); if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY) return(0); if (vp->v_filesize == NOOFFSET) return(0); if (uio->uio_resid == 0) return(0); /* * Iterate the uio on a page-by-page basis * * XXX can we leave the object held shared during the uiomove()? */ ++vm_read_shortcut_count; obj = vp->v_object; vm_object_hold_shared(obj); error = 0; while (uio->uio_resid && error == 0) { offset = (int)uio->uio_offset & PAGE_MASK; n = PAGE_SIZE - offset; if (n > uio->uio_resid) n = uio->uio_resid; if (vp->v_filesize < uio->uio_offset) break; if (uio->uio_offset + n > vp->v_filesize) n = vp->v_filesize - uio->uio_offset; if (n == 0) break; /* hit EOF */ m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset), FALSE, &error); if (error || m == NULL) { ++vm_read_shortcut_failed; error = 0; break; } if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { ++vm_read_shortcut_failed; vm_page_wakeup(m); break; } lwb = lwbuf_alloc(m, &lwb_cache); /* * Use a no-fault uiomove() to avoid deadlocking against * our VM object (which could livelock on the same object * due to shared-vs-exclusive), or deadlocking against * our busied page. Returns EFAULT on any fault which * winds up diving a vnode. */ error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset, n, uio); vm_page_flag_set(m, PG_REFERENCED); lwbuf_free(lwb); vm_page_wakeup(m); } vm_object_drop(obj); /* * Ignore EFAULT since we used uiomove_nofault(), causes caller * to fall-back to normal code for this case. */ if (error == EFAULT) error = 0; return (error); }
static int link_elf_load_file(const char* filename, linker_file_t* result) { struct nlookupdata nd; struct thread *td = curthread; /* XXX */ struct proc *p = td->td_proc; struct vnode *vp; Elf_Ehdr *hdr; caddr_t firstpage; int nbytes, i; Elf_Phdr *phdr; Elf_Phdr *phlimit; Elf_Phdr *segs[2]; int nsegs; Elf_Phdr *phdyn; Elf_Phdr *phphdr; caddr_t mapbase; size_t mapsize; Elf_Off base_offset; Elf_Addr base_vaddr; Elf_Addr base_vlimit; int error = 0; int resid; elf_file_t ef; linker_file_t lf; char *pathname; Elf_Shdr *shdr; int symtabindex; int symstrindex; int symcnt; int strcnt; /* XXX Hack for firmware loading where p == NULL */ if (p == NULL) { p = &proc0; } KKASSERT(p != NULL); if (p->p_ucred == NULL) { kprintf("link_elf_load_file: cannot load '%s' from filesystem" " this early\n", filename); return ENOENT; } shdr = NULL; lf = NULL; pathname = linker_search_path(filename); if (pathname == NULL) return ENOENT; error = nlookup_init(&nd, pathname, UIO_SYSSPACE, NLC_FOLLOW|NLC_LOCKVP); if (error == 0) error = vn_open(&nd, NULL, FREAD, 0); kfree(pathname, M_LINKER); if (error) { nlookup_done(&nd); return error; } vp = nd.nl_open_vp; nd.nl_open_vp = NULL; nlookup_done(&nd); /* * Read the elf header from the file. */ firstpage = kmalloc(PAGE_SIZE, M_LINKER, M_WAITOK); hdr = (Elf_Ehdr *)firstpage; error = vn_rdwr(UIO_READ, vp, firstpage, PAGE_SIZE, 0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); nbytes = PAGE_SIZE - resid; if (error) goto out; if (!IS_ELF(*hdr)) { error = ENOEXEC; goto out; } if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) { link_elf_error("Unsupported file layout"); error = ENOEXEC; goto out; } if (hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_version != EV_CURRENT) { link_elf_error("Unsupported file version"); error = ENOEXEC; goto out; } if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN) { error = ENOSYS; goto out; } if (hdr->e_machine != ELF_TARG_MACH) { link_elf_error("Unsupported machine"); error = ENOEXEC; goto out; } /* * We rely on the program header being in the first page. This is * not strictly required by the ABI specification, but it seems to * always true in practice. And, it simplifies things considerably. */ if (!((hdr->e_phentsize == sizeof(Elf_Phdr)) && (hdr->e_phoff + hdr->e_phnum*sizeof(Elf_Phdr) <= PAGE_SIZE) && (hdr->e_phoff + hdr->e_phnum*sizeof(Elf_Phdr) <= nbytes))) link_elf_error("Unreadable program headers"); /* * Scan the program header entries, and save key information. * * We rely on there being exactly two load segments, text and data, * in that order. */ phdr = (Elf_Phdr *) (firstpage + hdr->e_phoff); phlimit = phdr + hdr->e_phnum; nsegs = 0; phdyn = NULL; phphdr = NULL; while (phdr < phlimit) { switch (phdr->p_type) { case PT_LOAD: if (nsegs == 2) { link_elf_error("Too many sections"); error = ENOEXEC; goto out; } segs[nsegs] = phdr; ++nsegs; break; case PT_PHDR: phphdr = phdr; break; case PT_DYNAMIC: phdyn = phdr; break; case PT_INTERP: error = ENOSYS; goto out; } ++phdr; } if (phdyn == NULL) { link_elf_error("Object is not dynamically-linked"); error = ENOEXEC; goto out; } /* * Allocate the entire address space of the object, to stake out our * contiguous region, and to establish the base address for relocation. */ base_offset = trunc_page(segs[0]->p_offset); base_vaddr = trunc_page(segs[0]->p_vaddr); base_vlimit = round_page(segs[1]->p_vaddr + segs[1]->p_memsz); mapsize = base_vlimit - base_vaddr; ef = kmalloc(sizeof(struct elf_file), M_LINKER, M_WAITOK | M_ZERO); #ifdef SPARSE_MAPPING ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT); if (ef->object == NULL) { kfree(ef, M_LINKER); error = ENOMEM; goto out; } vm_object_hold(ef->object); vm_object_reference_locked(ef->object); ef->address = (caddr_t)vm_map_min(&kernel_map); error = vm_map_find(&kernel_map, ef->object, 0, (vm_offset_t *)&ef->address, mapsize, PAGE_SIZE, 1, VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0); vm_object_drop(ef->object); if (error) { vm_object_deallocate(ef->object); kfree(ef, M_LINKER); goto out; } #else ef->address = kmalloc(mapsize, M_LINKER, M_WAITOK); #endif mapbase = ef->address; /* * Read the text and data sections and zero the bss. */ for (i = 0; i < 2; i++) { caddr_t segbase = mapbase + segs[i]->p_vaddr - base_vaddr; error = vn_rdwr(UIO_READ, vp, segbase, segs[i]->p_filesz, segs[i]->p_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) { #ifdef SPARSE_MAPPING vm_map_remove(&kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); #else kfree(ef->address, M_LINKER); #endif kfree(ef, M_LINKER); goto out; } bzero(segbase + segs[i]->p_filesz, segs[i]->p_memsz - segs[i]->p_filesz); #ifdef SPARSE_MAPPING /* * Wire down the pages */ vm_map_wire(&kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, 0); #endif } ef->dynamic = (const Elf_Dyn *) (mapbase + phdyn->p_vaddr - base_vaddr); lf = linker_make_file(filename, ef, &link_elf_file_ops); if (lf == NULL) { #ifdef SPARSE_MAPPING vm_map_remove(&kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); #else kfree(ef->address, M_LINKER); #endif kfree(ef, M_LINKER); error = ENOMEM; goto out; } lf->address = ef->address; lf->size = mapsize; error = parse_dynamic(lf); if (error) goto out; link_elf_reloc_local(lf); error = linker_load_dependencies(lf); if (error) goto out; error = relocate_file(lf); if (error) goto out; /* Try and load the symbol table if it's present. (you can strip it!) */ nbytes = hdr->e_shnum * hdr->e_shentsize; if (nbytes == 0 || hdr->e_shoff == 0) goto nosyms; shdr = kmalloc(nbytes, M_LINKER, M_WAITOK | M_ZERO); error = vn_rdwr(UIO_READ, vp, (caddr_t)shdr, nbytes, hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; symtabindex = -1; symstrindex = -1; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_type == SHT_SYMTAB) { symtabindex = i; symstrindex = shdr[i].sh_link; } } if (symtabindex < 0 || symstrindex < 0) goto nosyms; symcnt = shdr[symtabindex].sh_size; ef->symbase = kmalloc(symcnt, M_LINKER, M_WAITOK); strcnt = shdr[symstrindex].sh_size; ef->strbase = kmalloc(strcnt, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, vp, ef->symbase, symcnt, shdr[symtabindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; error = vn_rdwr(UIO_READ, vp, ef->strbase, strcnt, shdr[symstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid); if (error) goto out; ef->ddbsymcnt = symcnt / sizeof(Elf_Sym); ef->ddbsymtab = (const Elf_Sym *)ef->symbase; ef->ddbstrcnt = strcnt; ef->ddbstrtab = ef->strbase; nosyms: *result = lf; out: if (error && lf) linker_file_unload(lf); if (shdr) kfree(shdr, M_LINKER); if (firstpage) kfree(firstpage, M_LINKER); vn_unlock(vp); vn_close(vp, FREAD); return error; }