static int hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data) { hammer2_ioc_recluster_t *recl = data; struct file *fp; hammer2_cluster_t *cluster; int error; fp = holdfp(curproc->p_fd, recl->fd, -1); if (fp) { kprintf("reconnect to cluster: XXX "); cluster = &ip->pmp->iroot->cluster; if (cluster->nchains != 1 || cluster->focus == NULL) { kprintf("not a local device mount\n"); error = EINVAL; } else { hammer2_cluster_reconnect(cluster->focus->hmp, fp); kprintf("ok\n"); error = 0; } } else { error = EINVAL; } return error; }
/* * mmap_args(void *addr, size_t len, int prot, int flags, int fd, * long pad, off_t pos) * * Memory Map (mmap) system call. Note that the file offset * and address are allowed to be NOT page aligned, though if * the MAP_FIXED flag it set, both must have the same remainder * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not * page-aligned, the actual mapping starts at trunc_page(addr) * and the return value is adjusted up by the page offset. * * Generally speaking, only character devices which are themselves * memory-based, such as a video framebuffer, can be mmap'd. Otherwise * there would be no cache coherency between a descriptor and a VM mapping * both to the same character device. * * Block devices can be mmap'd no matter what they represent. Cache coherency * is maintained as long as you do not write directly to the underlying * character device. * * No requirements */ int kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, int uprot, int uflags, int fd, off_t upos, void **res) { struct thread *td = curthread; struct proc *p = td->td_proc; struct file *fp = NULL; struct vnode *vp; vm_offset_t addr; vm_offset_t tmpaddr; vm_size_t size, pageoff; vm_prot_t prot, maxprot; void *handle; int flags, error; off_t pos; vm_object_t obj; KKASSERT(p); addr = (vm_offset_t) uaddr; size = ulen; prot = uprot & VM_PROT_ALL; flags = uflags; pos = upos; /* * Make sure mapping fits into numeric range etc. * * NOTE: We support the full unsigned range for size now. */ if (((flags & MAP_ANON) && (fd != -1 || pos != 0))) return (EINVAL); if (size == 0) return (EINVAL); if (flags & MAP_STACK) { if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) return (EINVAL); flags |= MAP_ANON; pos = 0; } /* * Virtual page tables cannot be used with MAP_STACK. Apart from * it not making any sense, the aux union is used by both * types. * * Because the virtual page table is stored in the backing object * and might be updated by the kernel, the mapping must be R+W. */ if (flags & MAP_VPAGETABLE) { if (vkernel_enable == 0) return (EOPNOTSUPP); if (flags & MAP_STACK) return (EINVAL); if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE)) return (EINVAL); } /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (pos & PAGE_MASK); pos -= pageoff; /* Adjust size for rounding (on both ends). */ size += pageoff; /* low end... */ size = (vm_size_t) round_page(size); /* hi end */ if (size < ulen) /* wrap */ return(EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & (MAP_FIXED | MAP_TRYFIXED)) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ addr -= pageoff; if (addr & PAGE_MASK) return (EINVAL); /* * Address range must be all in user VM space and not wrap. */ tmpaddr = addr + size; if (tmpaddr < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) return (EINVAL); if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) return (EINVAL); } else { /* * Get a hint of where to map. It also provides mmap offset * randomization if enabled. */ addr = vm_map_hint(p, addr, prot); } if (flags & MAP_ANON) { /* * Mapping blank space is trivial. */ handle = NULL; maxprot = VM_PROT_ALL; } else { /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ fp = holdfp(p->p_fd, fd, -1); if (fp == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) { error = EINVAL; goto done; } /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support * read(2)/write(2) -- or even open(2). Thus, we can * use MAP_ASYNC to trade on-disk coherence for speed. * The shm_open(3) library routine turns on the FPOSIXSHM * flag to request this behavior. */ if (fp->f_flag & FPOSIXSHM) flags |= MAP_NOSYNC; vp = (struct vnode *) fp->f_data; /* * Validate the vnode for the operation. */ switch(vp->v_type) { case VREG: /* * Get the proper underlying object */ if ((obj = vp->v_object) == NULL) { error = EINVAL; goto done; } KKASSERT((struct vnode *)obj->handle == vp); break; case VCHR: /* * Make sure a device has not been revoked. * Mappability is handled by the device layer. */ if (vp->v_rdev == NULL) { error = EBADF; goto done; } break; default: /* * Nothing else is mappable. */ error = EINVAL; goto done; } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { handle = NULL; maxprot = VM_PROT_ALL; flags |= MAP_ANON; pos = 0; } else { /* * cdevs does not provide private mappings of any kind. */ if (vp->v_type == VCHR && (flags & (MAP_PRIVATE|MAP_COPY))) { error = EINVAL; goto done; } /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_flag & FREAD) { maxprot |= VM_PROT_READ; } else if (prot & PROT_READ) { error = EACCES; goto done; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. Check for superuser, only if * we're at securelevel < 1, to allow the XIG X server * to continue to work. */ if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) { if ((fp->f_flag & FWRITE) != 0) { struct vattr va; if ((error = VOP_GETATTR(vp, &va))) { goto done; } if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) { maxprot |= VM_PROT_WRITE; } else if (prot & PROT_WRITE) { error = EPERM; goto done; } } else if ((prot & PROT_WRITE) != 0) { error = EACCES; goto done; } } else { maxprot |= VM_PROT_WRITE; } handle = (void *)vp; } } lwkt_gettoken(&vms->vm_map.token); /* * Do not allow more then a certain number of vm_map_entry structures * per process. Scale with the number of rforks sharing the map * to make the limit reasonable for threads. */ if (max_proc_mmap && vms->vm_map.nentries >= max_proc_mmap * vmspace_getrefs(vms)) { error = ENOMEM; lwkt_reltoken(&vms->vm_map.token); goto done; } error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, flags, handle, pos); if (error == 0) *res = (void *)(addr + pageoff); lwkt_reltoken(&vms->vm_map.token); done: if (fp) fdrop(fp); return (error); }
/* * fdesc_getattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred) */ static int fdesc_getattr(struct vop_getattr_args *ap) { struct proc *p = curproc; struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct file *fp; struct stat stb; u_int fd; int error = 0; KKASSERT(p); switch (VTOFDESC(vp)->fd_type) { case Froot: VATTR_NULL(vap); vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; vap->va_type = VDIR; vap->va_nlink = 2; vap->va_size = DEV_BSIZE; vap->va_fileid = VTOFDESC(vp)->fd_ix; vap->va_uid = 0; vap->va_gid = 0; vap->va_blocksize = DEV_BSIZE; vap->va_atime.tv_sec = boottime.tv_sec; vap->va_atime.tv_nsec = 0; vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_mtime; vap->va_gen = 0; vap->va_flags = 0; vap->va_rmajor = VNOVAL; vap->va_rminor = VNOVAL; vap->va_bytes = 0; break; case Fdesc: fd = VTOFDESC(vp)->fd_fd; fp = holdfp(p->p_fd, fd, -1); if (fp == NULL) return (EBADF); bzero(&stb, sizeof(stb)); error = fo_stat(fp, &stb, curproc->p_ucred); fdrop(fp); if (error == 0) { VATTR_NULL(vap); vap->va_type = IFTOVT(stb.st_mode); vap->va_mode = stb.st_mode; #define FDRX (VREAD|VEXEC) if (vap->va_type == VDIR) vap->va_mode &= ~((FDRX)|(FDRX>>3)|(FDRX>>6)); #undef FDRX vap->va_nlink = 1; vap->va_flags = 0; vap->va_bytes = stb.st_blocks * stb.st_blksize; vap->va_fileid = VTOFDESC(vp)->fd_ix; vap->va_size = stb.st_size; vap->va_blocksize = stb.st_blksize; vap->va_rmajor = umajor(stb.st_rdev); vap->va_rminor = uminor(stb.st_rdev); /* * If no time data is provided, use the current time. */ if (stb.st_atimespec.tv_sec == 0 && stb.st_atimespec.tv_nsec == 0) nanotime(&stb.st_atimespec); if (stb.st_ctimespec.tv_sec == 0 && stb.st_ctimespec.tv_nsec == 0) nanotime(&stb.st_ctimespec); if (stb.st_mtimespec.tv_sec == 0 && stb.st_mtimespec.tv_nsec == 0) nanotime(&stb.st_mtimespec); vap->va_atime = stb.st_atimespec; vap->va_mtime = stb.st_mtimespec; vap->va_ctime = stb.st_ctimespec; vap->va_uid = stb.st_uid; vap->va_gid = stb.st_gid; } break; default: panic("fdesc_getattr"); break; }