Пример #1
0
static int
hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data)
{
	hammer2_ioc_recluster_t *recl = data;
	struct file *fp;
	hammer2_cluster_t *cluster;
	int error;

	fp = holdfp(curproc->p_fd, recl->fd, -1);
	if (fp) {
		kprintf("reconnect to cluster: XXX ");
		cluster = &ip->pmp->iroot->cluster;
		if (cluster->nchains != 1 || cluster->focus == NULL) {
			kprintf("not a local device mount\n");
			error = EINVAL;
		} else {
			hammer2_cluster_reconnect(cluster->focus->hmp, fp);
			kprintf("ok\n");
			error = 0;
		}
	} else {
		error = EINVAL;
	}
	return error;
}
Пример #2
0
/* 
 * mmap_args(void *addr, size_t len, int prot, int flags, int fd,
 *		long pad, off_t pos)
 *
 * Memory Map (mmap) system call.  Note that the file offset
 * and address are allowed to be NOT page aligned, though if
 * the MAP_FIXED flag it set, both must have the same remainder
 * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
 * page-aligned, the actual mapping starts at trunc_page(addr)
 * and the return value is adjusted up by the page offset.
 *
 * Generally speaking, only character devices which are themselves
 * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
 * there would be no cache coherency between a descriptor and a VM mapping
 * both to the same character device.
 *
 * Block devices can be mmap'd no matter what they represent.  Cache coherency
 * is maintained as long as you do not write directly to the underlying
 * character device.
 *
 * No requirements
 */
int
kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
	  int uprot, int uflags, int fd, off_t upos, void **res)
{
	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
	struct file *fp = NULL;
	struct vnode *vp;
	vm_offset_t addr;
	vm_offset_t tmpaddr;
	vm_size_t size, pageoff;
	vm_prot_t prot, maxprot;
	void *handle;
	int flags, error;
	off_t pos;
	vm_object_t obj;

	KKASSERT(p);

	addr = (vm_offset_t) uaddr;
	size = ulen;
	prot = uprot & VM_PROT_ALL;
	flags = uflags;
	pos = upos;

	/*
	 * Make sure mapping fits into numeric range etc.
	 *
	 * NOTE: We support the full unsigned range for size now.
	 */
	if (((flags & MAP_ANON) && (fd != -1 || pos != 0)))
		return (EINVAL);

	if (size == 0)
		return (EINVAL);

	if (flags & MAP_STACK) {
		if ((fd != -1) ||
		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
			return (EINVAL);
		flags |= MAP_ANON;
		pos = 0;
	}

	/*
	 * Virtual page tables cannot be used with MAP_STACK.  Apart from
	 * it not making any sense, the aux union is used by both
	 * types.
	 *
	 * Because the virtual page table is stored in the backing object
	 * and might be updated by the kernel, the mapping must be R+W.
	 */
	if (flags & MAP_VPAGETABLE) {
		if (vkernel_enable == 0)
			return (EOPNOTSUPP);
		if (flags & MAP_STACK)
			return (EINVAL);
		if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
			return (EINVAL);
	}

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (pos & PAGE_MASK);
	pos -= pageoff;

	/* Adjust size for rounding (on both ends). */
	size += pageoff;			/* low end... */
	size = (vm_size_t) round_page(size);	/* hi end */
	if (size < ulen)			/* wrap */
		return(EINVAL);

	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & (MAP_FIXED | MAP_TRYFIXED)) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		addr -= pageoff;
		if (addr & PAGE_MASK)
			return (EINVAL);

		/*
		 * Address range must be all in user VM space and not wrap.
		 */
		tmpaddr = addr + size;
		if (tmpaddr < addr)
			return (EINVAL);
		if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS)
			return (EINVAL);
		if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
			return (EINVAL);
	} else {
		/*
		 * Get a hint of where to map. It also provides mmap offset
		 * randomization if enabled.
		 */
		addr = vm_map_hint(p, addr, prot);
	}

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.
		 */
		handle = NULL;
		maxprot = VM_PROT_ALL;
	} else {
		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		fp = holdfp(p->p_fd, fd, -1);
		if (fp == NULL)
			return (EBADF);
		if (fp->f_type != DTYPE_VNODE) {
			error = EINVAL;
			goto done;
		}
		/*
		 * POSIX shared-memory objects are defined to have
		 * kernel persistence, and are not defined to support
		 * read(2)/write(2) -- or even open(2).  Thus, we can
		 * use MAP_ASYNC to trade on-disk coherence for speed.
		 * The shm_open(3) library routine turns on the FPOSIXSHM
		 * flag to request this behavior.
		 */
		if (fp->f_flag & FPOSIXSHM)
			flags |= MAP_NOSYNC;
		vp = (struct vnode *) fp->f_data;

		/*
		 * Validate the vnode for the operation.
		 */
		switch(vp->v_type) {
		case VREG:
			/*
			 * Get the proper underlying object
			 */
			if ((obj = vp->v_object) == NULL) {
				error = EINVAL;
				goto done;
			}
			KKASSERT((struct vnode *)obj->handle == vp);
			break;
		case VCHR:
			/*
			 * Make sure a device has not been revoked.  
			 * Mappability is handled by the device layer.
			 */
			if (vp->v_rdev == NULL) {
				error = EBADF;
				goto done;
			}
			break;
		default:
			/*
			 * Nothing else is mappable.
			 */
			error = EINVAL;
			goto done;
		}

		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
			handle = NULL;
			maxprot = VM_PROT_ALL;
			flags |= MAP_ANON;
			pos = 0;
		} else {
			/*
			 * cdevs does not provide private mappings of any kind.
			 */
			if (vp->v_type == VCHR &&
			    (flags & (MAP_PRIVATE|MAP_COPY))) {
				error = EINVAL;
				goto done;
			}
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_flag & FREAD) {
				maxprot |= VM_PROT_READ;
			} else if (prot & PROT_READ) {
				error = EACCES;
				goto done;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out.  Check for superuser, only if
			 * we're at securelevel < 1, to allow the XIG X server
			 * to continue to work.
			 */
			if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) {
				if ((fp->f_flag & FWRITE) != 0) {
					struct vattr va;
					if ((error = VOP_GETATTR(vp, &va))) {
						goto done;
					}
					if ((va.va_flags &
					    (IMMUTABLE|APPEND)) == 0) {
						maxprot |= VM_PROT_WRITE;
					} else if (prot & PROT_WRITE) {
						error = EPERM;
						goto done;
					}
				} else if ((prot & PROT_WRITE) != 0) {
					error = EACCES;
					goto done;
				}
			} else {
				maxprot |= VM_PROT_WRITE;
			}
			handle = (void *)vp;
		}
	}

	lwkt_gettoken(&vms->vm_map.token);

	/*
	 * Do not allow more then a certain number of vm_map_entry structures
	 * per process.  Scale with the number of rforks sharing the map
	 * to make the limit reasonable for threads.
	 */
	if (max_proc_mmap && 
	    vms->vm_map.nentries >= max_proc_mmap * vmspace_getrefs(vms)) {
		error = ENOMEM;
		lwkt_reltoken(&vms->vm_map.token);
		goto done;
	}

	error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
			flags, handle, pos);
	if (error == 0)
		*res = (void *)(addr + pageoff);

	lwkt_reltoken(&vms->vm_map.token);
done:
	if (fp)
		fdrop(fp);

	return (error);
}
Пример #3
0
/*
 * fdesc_getattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred)
 */
static int
fdesc_getattr(struct vop_getattr_args *ap)
{
	struct proc *p = curproc;
	struct vnode *vp = ap->a_vp;
	struct vattr *vap = ap->a_vap;
	struct file *fp;
	struct stat stb;
	u_int fd;
	int error = 0;

	KKASSERT(p);

	switch (VTOFDESC(vp)->fd_type) {
	case Froot:
		VATTR_NULL(vap);

		vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
		vap->va_type = VDIR;
		vap->va_nlink = 2;
		vap->va_size = DEV_BSIZE;
		vap->va_fileid = VTOFDESC(vp)->fd_ix;
		vap->va_uid = 0;
		vap->va_gid = 0;
		vap->va_blocksize = DEV_BSIZE;
		vap->va_atime.tv_sec = boottime.tv_sec;
		vap->va_atime.tv_nsec = 0;
		vap->va_mtime = vap->va_atime;
		vap->va_ctime = vap->va_mtime;
		vap->va_gen = 0;
		vap->va_flags = 0;
		vap->va_rmajor = VNOVAL;
		vap->va_rminor = VNOVAL;
		vap->va_bytes = 0;
		break;

	case Fdesc:
		fd = VTOFDESC(vp)->fd_fd;

		fp = holdfp(p->p_fd, fd, -1);
		if (fp == NULL)
			return (EBADF);

		bzero(&stb, sizeof(stb));
		error = fo_stat(fp, &stb, curproc->p_ucred);
		fdrop(fp);

		if (error == 0) {
			VATTR_NULL(vap);
			vap->va_type = IFTOVT(stb.st_mode);
			vap->va_mode = stb.st_mode;
#define FDRX (VREAD|VEXEC)
			if (vap->va_type == VDIR)
				vap->va_mode &= ~((FDRX)|(FDRX>>3)|(FDRX>>6));
#undef FDRX
			vap->va_nlink = 1;
			vap->va_flags = 0;
			vap->va_bytes = stb.st_blocks * stb.st_blksize;
			vap->va_fileid = VTOFDESC(vp)->fd_ix;
			vap->va_size = stb.st_size;
			vap->va_blocksize = stb.st_blksize;
			vap->va_rmajor = umajor(stb.st_rdev);
			vap->va_rminor = uminor(stb.st_rdev);

			/*
			 * If no time data is provided, use the current time.
			 */
			if (stb.st_atimespec.tv_sec == 0 &&
			    stb.st_atimespec.tv_nsec == 0)
				nanotime(&stb.st_atimespec);

			if (stb.st_ctimespec.tv_sec == 0 &&
			    stb.st_ctimespec.tv_nsec == 0)
				nanotime(&stb.st_ctimespec);

			if (stb.st_mtimespec.tv_sec == 0 &&
			    stb.st_mtimespec.tv_nsec == 0)
				nanotime(&stb.st_mtimespec);

			vap->va_atime = stb.st_atimespec;
			vap->va_mtime = stb.st_mtimespec;
			vap->va_ctime = stb.st_ctimespec;
			vap->va_uid = stb.st_uid;
			vap->va_gid = stb.st_gid;
		}
		break;

	default:
		panic("fdesc_getattr");
		break;
	}