/*
 * The close() system call uses it's own audit call to capture the path/vnode
 * information because those pieces are not easily obtained within the system
 * call itself.
 */
void
audit_sysclose(struct thread *td, int fd)
{
	struct kaudit_record *ar;
	struct vnode *vp;
	struct file *fp;
	int vfslocked;

	KASSERT(td != NULL, ("audit_sysclose: td == NULL"));

	ar = currecord();
	if (ar == NULL)
		return;

	audit_arg_fd(fd);

	if (getvnode(td->td_proc->p_fd, fd, &fp) != 0)
		return;

	vp = fp->f_vnode;
	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	audit_arg_vnode1(vp);
	VOP_UNLOCK(vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	fdrop(fp, td);
}
Example #2
0
int
kobj_read_file_vnode(struct _buf *file, char *buf, unsigned size, unsigned off)
{
	struct vnode *vp = file->ptr;
	struct thread *td = curthread;
	struct uio auio;
	struct iovec aiov;
	int error, vfslocked;

	bzero(&aiov, sizeof(aiov));
	bzero(&auio, sizeof(auio));

	aiov.iov_base = buf;
	aiov.iov_len = size;

	auio.uio_iov = &aiov;
	auio.uio_offset = (off_t)off;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_READ;
	auio.uio_iovcnt = 1;
	auio.uio_resid = size;
	auio.uio_td = td;

	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	error = VOP_READ(vp, &auio, IO_UNIT | IO_SYNC, td->td_ucred);
	VOP_UNLOCK(vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	return (error != 0 ? -1 : size - auio.uio_resid);
}
Example #3
0
static void *
kobj_open_file_vnode(const char *file)
{
	struct thread *td = curthread;
	struct filedesc *fd;
	struct nameidata nd;
	int error, flags, vfslocked;

	fd = td->td_proc->p_fd;
	FILEDESC_XLOCK(fd);
	if (fd->fd_rdir == NULL) {
		fd->fd_rdir = rootvnode;
		vref(fd->fd_rdir);
	}
	if (fd->fd_cdir == NULL) {
		fd->fd_cdir = rootvnode;
		vref(fd->fd_cdir);
	}
	FILEDESC_XUNLOCK(fd);

	flags = FREAD | O_NOFOLLOW;
	NDINIT(&nd, LOOKUP, MPSAFE, UIO_SYSSPACE, file, td);
	error = vn_open_cred(&nd, &flags, 0, 0, curthread->td_ucred, NULL);
	if (error != 0)
		return (NULL);
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	/* We just unlock so we hold a reference. */
	VOP_UNLOCK(nd.ni_vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	return (nd.ni_vp);
}
/*
 * Audit information about a file, either the file's vnode info, or its
 * socket address info.
 */
void
audit_arg_file(struct proc *p, struct file *fp)
{
	struct kaudit_record *ar;
	struct socket *so;
	struct inpcb *pcb;
	struct vnode *vp;
	int vfslocked;

	ar = currecord();
	if (ar == NULL)
		return;

	switch (fp->f_type) {
	case DTYPE_VNODE:
	case DTYPE_FIFO:
		/*
		 * XXXAUDIT: Only possibly to record as first vnode?
		 */
		vp = fp->f_vnode;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_SHARED | LK_RETRY);
		audit_arg_vnode1(vp);
		VOP_UNLOCK(vp, 0);
		VFS_UNLOCK_GIANT(vfslocked);
		break;

	case DTYPE_SOCKET:
		so = (struct socket *)fp->f_data;
		if (INP_CHECK_SOCKAF(so, PF_INET)) {
			SOCK_LOCK(so);
			ar->k_ar.ar_arg_sockinfo.so_type =
			    so->so_type;
			ar->k_ar.ar_arg_sockinfo.so_domain =
			    INP_SOCKAF(so);
			ar->k_ar.ar_arg_sockinfo.so_protocol =
			    so->so_proto->pr_protocol;
			SOCK_UNLOCK(so);
			pcb = (struct inpcb *)so->so_pcb;
			INP_RLOCK(pcb);
			ar->k_ar.ar_arg_sockinfo.so_raddr =
			    pcb->inp_faddr.s_addr;
			ar->k_ar.ar_arg_sockinfo.so_laddr =
			    pcb->inp_laddr.s_addr;
			ar->k_ar.ar_arg_sockinfo.so_rport =
			    pcb->inp_fport;
			ar->k_ar.ar_arg_sockinfo.so_lport =
			    pcb->inp_lport;
			INP_RUNLOCK(pcb);
			ARG_SET_VALID(ar, ARG_SOCKINFO);
		}
		break;

	default:
		/* XXXAUDIT: else? */
		break;
	}
}
Example #5
0
int
alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
               int size, int flags)
{
    struct thread *td;
    struct nameidata nd;
    struct alq *alq;
    int oflags;
    int error;
    int vfslocked;

    KASSERT((size > 0), ("%s: size <= 0", __func__));

    *alqp = NULL;
    td = curthread;

    NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
    oflags = FWRITE | O_NOFOLLOW | O_CREAT;

    error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
    if (error)
        return (error);

    vfslocked = NDHASGIANT(&nd);
    NDFREE(&nd, NDF_ONLY_PNBUF);
    /* We just unlock so we hold a reference */
    VOP_UNLOCK(nd.ni_vp, 0);
    VFS_UNLOCK_GIANT(vfslocked);

    alq = bsd_malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
    alq->aq_vp = nd.ni_vp;
    alq->aq_cred = crhold(cred);

    mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);

    alq->aq_buflen = size;
    alq->aq_entmax = 0;
    alq->aq_entlen = 0;

    alq->aq_freebytes = alq->aq_buflen;
    alq->aq_entbuf = bsd_malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
    alq->aq_writehead = alq->aq_writetail = 0;
    if (flags & ALQ_ORDERED)
        alq->aq_flags |= AQ_ORDERED;

    if ((error = ald_add(alq)) != 0) {
        alq_destroy(alq);
        return (error);
    }

    *alqp = alq;

    return (0);
}
Example #6
0
void
kobj_close_file(struct _buf *file)
{

	if (file->mounted) {
		struct vnode *vp = file->ptr;
		struct thread *td = curthread;
		int vfslocked;

		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_close(vp, FREAD, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	kmem_free(file, sizeof(*file));
}
Example #7
0
static int
kobj_get_filesize_vnode(struct _buf *file, uint64_t *size)
{
	struct vnode *vp = file->ptr;
	struct vattr va;
	int error, vfslocked;

	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	error = VOP_GETATTR(vp, &va, curthread->td_ucred);
	VOP_UNLOCK(vp, 0);
	if (error == 0)
		*size = (uint64_t)va.va_size;
	VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Example #8
0
static int
vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
{
	static char buf[128];
	struct nameidata nd;
	off_t ofs;
	ssize_t resid;
	int error, flags, len, vfslocked;

	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE,
	    "/.mount.conf", td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);

	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	ofs = 0;
	len = sizeof(buf) - 1;
	while (1) {
		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
		    NOCRED, &resid, td);
		if (error)
			break;
		if (resid == len)
			break;
		buf[len - resid] = 0;
		sbuf_printf(sb, "%s", buf);
		ofs += len - resid;
	}

	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Example #9
0
static void
unlock_and_deallocate(struct faultstate *fs)
{

	vm_object_pip_wakeup(fs->object);
	VM_OBJECT_UNLOCK(fs->object);
	if (fs->object != fs->first_object) {
		VM_OBJECT_LOCK(fs->first_object);
		vm_page_lock(fs->first_m);
		vm_page_free(fs->first_m);
		vm_page_unlock(fs->first_m);
		vm_object_pip_wakeup(fs->first_object);
		VM_OBJECT_UNLOCK(fs->first_object);
		fs->first_m = NULL;
	}
	vm_object_deallocate(fs->first_object);
	unlock_map(fs);	
	if (fs->vp != NULL) { 
		vput(fs->vp);
		fs->vp = NULL;
	}
	VFS_UNLOCK_GIANT(fs->vfslocked);
	fs->vfslocked = 0;
}
Example #10
0
static void
mac_proc_vm_revoke_recurse(struct thread *td, struct ucred *cred,
    struct vm_map *map)
{
	vm_map_entry_t vme;
	int vfslocked, result;
	vm_prot_t revokeperms;
	vm_object_t backing_object, object;
	vm_ooffset_t offset;
	struct vnode *vp;
	struct mount *mp;

	if (!mac_mmap_revocation)
		return;

	vm_map_lock(map);
	for (vme = map->header.next; vme != &map->header; vme = vme->next) {
		if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) {
			mac_proc_vm_revoke_recurse(td, cred,
			    vme->object.sub_map);
			continue;
		}
		/*
		 * Skip over entries that obviously are not shared.
		 */
		if (vme->eflags & (MAP_ENTRY_COW | MAP_ENTRY_NOSYNC) ||
		    !vme->max_protection)
			continue;
		/*
		 * Drill down to the deepest backing object.
		 */
		offset = vme->offset;
		object = vme->object.vm_object;
		if (object == NULL)
			continue;
		VM_OBJECT_LOCK(object);
		while ((backing_object = object->backing_object) != NULL) {
			VM_OBJECT_LOCK(backing_object);
			offset += object->backing_object_offset;
			VM_OBJECT_UNLOCK(object);
			object = backing_object;
		}
		VM_OBJECT_UNLOCK(object);
		/*
		 * At the moment, vm_maps and objects aren't considered by
		 * the MAC system, so only things with backing by a normal
		 * object (read: vnodes) are checked.
		 */
		if (object->type != OBJT_VNODE)
			continue;
		vp = (struct vnode *)object->handle;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		result = vme->max_protection;
		mac_vnode_check_mmap_downgrade(cred, vp, &result);
		VOP_UNLOCK(vp, 0);
		/*
		 * Find out what maximum protection we may be allowing now
		 * but a policy needs to get removed.
		 */
		revokeperms = vme->max_protection & ~result;
		if (!revokeperms) {
			VFS_UNLOCK_GIANT(vfslocked);
			continue;
		}
		printf("pid %ld: revoking %s perms from %#lx:%ld "
		    "(max %s/cur %s)\n", (long)td->td_proc->p_pid,
		    prot2str(revokeperms), (u_long)vme->start,
		    (long)(vme->end - vme->start),
		    prot2str(vme->max_protection), prot2str(vme->protection));
		/*
		 * This is the really simple case: if a map has more
		 * max_protection than is allowed, but it's not being
		 * actually used (that is, the current protection is still
		 * allowed), we can just wipe it out and do nothing more.
		 */
		if ((vme->protection & revokeperms) == 0) {
			vme->max_protection -= revokeperms;
		} else {
			if (revokeperms & VM_PROT_WRITE) {
				/*
				 * In the more complicated case, flush out all
				 * pending changes to the object then turn it
				 * copy-on-write.
				 */
				vm_object_reference(object);
				(void) vn_start_write(vp, &mp, V_WAIT);
				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
				VM_OBJECT_LOCK(object);
				vm_object_page_clean(object, offset, offset +
				    vme->end - vme->start, OBJPC_SYNC);
				VM_OBJECT_UNLOCK(object);
				VOP_UNLOCK(vp, 0);
				vn_finished_write(mp);
				vm_object_deallocate(object);
				/*
				 * Why bother if there's no read permissions
				 * anymore?  For the rest, we need to leave
				 * the write permissions on for COW, or
				 * remove them entirely if configured to.
				 */
				if (!mac_mmap_revocation_via_cow) {
					vme->max_protection &= ~VM_PROT_WRITE;
					vme->protection &= ~VM_PROT_WRITE;
				} if ((revokeperms & VM_PROT_READ) == 0)
					vme->eflags |= MAP_ENTRY_COW |
					    MAP_ENTRY_NEEDS_COPY;
			}
			if (revokeperms & VM_PROT_EXECUTE) {
				vme->max_protection &= ~VM_PROT_EXECUTE;
				vme->protection &= ~VM_PROT_EXECUTE;
			}
			if (revokeperms & VM_PROT_READ) {
				vme->max_protection = 0;
				vme->protection = 0;
			}
			pmap_protect(map->pmap, vme->start, vme->end,
			    vme->protection & ~revokeperms);
			vm_map_simplify_entry(map, vme);
		}
		VFS_UNLOCK_GIANT(vfslocked);
	}
	vm_map_unlock(map);
}
/*
 * Exit: deallocate address space and other resources, change proc state to
 * zombie, and unlink proc from allproc and parent's lists.  Save exit status
 * and rusage for wait().  Check for child processes and orphan them.
 */
void
exit1(struct thread *td, int rv)
{
	struct proc *p, *nq, *q;
	struct vnode *vtmp;
	struct vnode *ttyvp = NULL;
#ifdef KTRACE
	struct vnode *tracevp;
	struct ucred *tracecred;
#endif
	struct plimit *plim;
	int locked;

	mtx_assert(&Giant, MA_NOTOWNED);

	p = td->td_proc;
	/*
	 * XXX in case we're rebooting we just let init die in order to
	 * work around an unsolved stack overflow seen very late during
	 * shutdown on sparc64 when the gmirror worker process exists.
	 */ 
	if (p == initproc && rebooting == 0) {
		printf("init died (signal %d, exit %d)\n",
		    WTERMSIG(rv), WEXITSTATUS(rv));
		panic("Going nowhere without my init!");
	}

	/*
	 * MUST abort all other threads before proceeding past here.
	 */
	PROC_LOCK(p);
	while (p->p_flag & P_HADTHREADS) {
		/*
		 * First check if some other thread got here before us..
		 * if so, act apropriatly, (exit or suspend);
		 */
		thread_suspend_check(0);

		/*
		 * Kill off the other threads. This requires
		 * some co-operation from other parts of the kernel
		 * so it may not be instantaneous.  With this state set
		 * any thread entering the kernel from userspace will
		 * thread_exit() in trap().  Any thread attempting to
		 * sleep will return immediately with EINTR or EWOULDBLOCK
		 * which will hopefully force them to back out to userland
		 * freeing resources as they go.  Any thread attempting
		 * to return to userland will thread_exit() from userret().
		 * thread_exit() will unsuspend us when the last of the
		 * other threads exits.
		 * If there is already a thread singler after resumption,
		 * calling thread_single will fail; in that case, we just
		 * re-check all suspension request, the thread should
		 * either be suspended there or exit.
		 */
		if (! thread_single(SINGLE_EXIT))
			break;

		/*
		 * All other activity in this process is now stopped.
		 * Threading support has been turned off.
		 */
	}
	KASSERT(p->p_numthreads == 1,
	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
	/*
	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
	 * on our vmspace, so we should block below until they have
	 * released their reference to us.  Note that if they have
	 * requested S_EXIT stops we will block here until they ack
	 * via PIOCCONT.
	 */
	_STOPEVENT(p, S_EXIT, rv);

	/*
	 * Note that we are exiting and do another wakeup of anyone in
	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
	 * decided to wait again after we told them we are exiting.
	 */
	p->p_flag |= P_WEXIT;
	wakeup(&p->p_stype);

	/*
	 * Wait for any processes that have a hold on our vmspace to
	 * release their reference.
	 */
	while (p->p_lock > 0)
		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);

	PROC_UNLOCK(p);
	/* Drain the limit callout while we don't have the proc locked */
	callout_drain(&p->p_limco);

#ifdef AUDIT
	/*
	 * The Sun BSM exit token contains two components: an exit status as
	 * passed to exit(), and a return value to indicate what sort of exit
	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
	 * what the return value is.
	 */
	AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0);
	AUDIT_SYSCALL_EXIT(0, td);
#endif

	/* Are we a task leader? */
	if (p == p->p_leader) {
		mtx_lock(&ppeers_lock);
		q = p->p_peers;
		while (q != NULL) {
			PROC_LOCK(q);
			psignal(q, SIGKILL);
			PROC_UNLOCK(q);
			q = q->p_peers;
		}
		while (p->p_peers != NULL)
			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
		mtx_unlock(&ppeers_lock);
	}

	/*
	 * Check if any loadable modules need anything done at process exit.
	 * E.g. SYSV IPC stuff
	 * XXX what if one of these generates an error?
	 */
	EVENTHANDLER_INVOKE(process_exit, p);

	/*
	 * If parent is waiting for us to exit or exec,
	 * P_PPWAIT is set; we will wakeup the parent below.
	 */
	PROC_LOCK(p);
	stopprofclock(p);
	p->p_flag &= ~(P_TRACED | P_PPWAIT);

	/*
	 * Stop the real interval timer.  If the handler is currently
	 * executing, prevent it from rearming itself and let it finish.
	 */
	if (timevalisset(&p->p_realtimer.it_value) &&
	    callout_stop(&p->p_itcallout) == 0) {
		timevalclear(&p->p_realtimer.it_interval);
		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
		KASSERT(!timevalisset(&p->p_realtimer.it_value),
		    ("realtime timer is still armed"));
	}
	PROC_UNLOCK(p);

	/*
	 * Reset any sigio structures pointing to us as a result of
	 * F_SETOWN with our pid.
	 */
	funsetownlst(&p->p_sigiolst);

	/*
	 * If this process has an nlminfo data area (for lockd), release it
	 */
	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
		(*nlminfo_release_p)(p);

	/*
	 * Close open files and release open-file table.
	 * This may block!
	 */
	fdfree(td);

	/*
	 * If this thread tickled GEOM, we need to wait for the giggling to
	 * stop before we return to userland
	 */
	if (td->td_pflags & TDP_GEOM)
		g_waitidle();

	/*
	 * Remove ourself from our leader's peer list and wake our leader.
	 */
	mtx_lock(&ppeers_lock);
	if (p->p_leader->p_peers) {
		q = p->p_leader;
		while (q->p_peers != p)
			q = q->p_peers;
		q->p_peers = p->p_peers;
		wakeup(p->p_leader);
	}
	mtx_unlock(&ppeers_lock);

	vmspace_exit(td);

	sx_xlock(&proctree_lock);
	if (SESS_LEADER(p)) {
		struct session *sp = p->p_session;
		struct tty *tp;

		/*
		 * s_ttyp is not zero'd; we use this to indicate that
		 * the session once had a controlling terminal. (for
		 * logging and informational purposes)
		 */
		SESS_LOCK(sp);
		ttyvp = sp->s_ttyvp;
		tp = sp->s_ttyp;
		sp->s_ttyvp = NULL;
		sp->s_leader = NULL;
		SESS_UNLOCK(sp);

		/*
		 * Signal foreground pgrp and revoke access to
		 * controlling terminal if it has not been revoked
		 * already.
		 *
		 * Because the TTY may have been revoked in the mean
		 * time and could already have a new session associated
		 * with it, make sure we don't send a SIGHUP to a
		 * foreground process group that does not belong to this
		 * session.
		 */

		if (tp != NULL) {
			tty_lock(tp);
			if (tp->t_session == sp)
				tty_signal_pgrp(tp, SIGHUP);
			tty_unlock(tp);
		}

		if (ttyvp != NULL) {
			sx_xunlock(&proctree_lock);
			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
				VOP_REVOKE(ttyvp, REVOKEALL);
				VOP_UNLOCK(ttyvp, 0);
			}
			sx_xlock(&proctree_lock);
		}
	}
	fixjobc(p, p->p_pgrp, 0);
	sx_xunlock(&proctree_lock);
	(void)acct_process(td);

	/* Release the TTY now we've unlocked everything. */
	if (ttyvp != NULL)
		vrele(ttyvp);
#ifdef KTRACE
	/*
	 * Disable tracing, then drain any pending records and release
	 * the trace file.
	 */
	if (p->p_traceflag != 0) {
		PROC_LOCK(p);
		mtx_lock(&ktrace_mtx);
		p->p_traceflag = 0;
		mtx_unlock(&ktrace_mtx);
		PROC_UNLOCK(p);
		ktrprocexit(td);
		PROC_LOCK(p);
		mtx_lock(&ktrace_mtx);
		tracevp = p->p_tracevp;
		p->p_tracevp = NULL;
		tracecred = p->p_tracecred;
		p->p_tracecred = NULL;
		mtx_unlock(&ktrace_mtx);
		PROC_UNLOCK(p);
		if (tracevp != NULL) {
			locked = VFS_LOCK_GIANT(tracevp->v_mount);
			vrele(tracevp);
			VFS_UNLOCK_GIANT(locked);
		}
		if (tracecred != NULL)
			crfree(tracecred);
	}
#endif
	/*
	 * Release reference to text vnode
	 */
	if ((vtmp = p->p_textvp) != NULL) {
		p->p_textvp = NULL;
		locked = VFS_LOCK_GIANT(vtmp->v_mount);
		vrele(vtmp);
		VFS_UNLOCK_GIANT(locked);
	}

	/*
	 * Release our limits structure.
	 */
	PROC_LOCK(p);
	plim = p->p_limit;
	p->p_limit = NULL;
	PROC_UNLOCK(p);
	lim_free(plim);

	/*
	 * Remove proc from allproc queue and pidhash chain.
	 * Place onto zombproc.  Unlink from parent's child list.
	 */
	sx_xlock(&allproc_lock);
	LIST_REMOVE(p, p_list);
	LIST_INSERT_HEAD(&zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	sx_xunlock(&allproc_lock);

	/*
	 * Call machine-dependent code to release any
	 * machine-dependent resources other than the address space.
	 * The address space is released by "vmspace_exitfree(p)" in
	 * vm_waitproc().
	 */
	cpu_exit(td);

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	/*
	 * Reparent all of our children to init.
	 */
	sx_xlock(&proctree_lock);
	q = LIST_FIRST(&p->p_children);
	if (q != NULL)		/* only need this if any child is S_ZOMB */
		wakeup(initproc);
	for (; q != NULL; q = nq) {
		nq = LIST_NEXT(q, p_sibling);
		PROC_LOCK(q);
		proc_reparent(q, initproc);
		q->p_sigparent = SIGCHLD;
		/*
		 * Traced processes are killed
		 * since their existence means someone is screwing up.
		 */
		if (q->p_flag & P_TRACED) {
			struct thread *temp;

			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
			FOREACH_THREAD_IN_PROC(q, temp)
				temp->td_dbgflags &= ~TDB_SUSPEND;
			psignal(q, SIGKILL);
		}
		PROC_UNLOCK(q);
	}

	/* Save exit status. */
	PROC_LOCK(p);
	p->p_xstat = rv;
	p->p_xthread = td;

	/* Tell the prison that we are gone. */
	prison_proc_free(p->p_ucred->cr_prison);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exit if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exit)
		dtrace_fasttrap_exit(p);
#endif

	/*
	 * Notify interested parties of our demise.
	 */
	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);

#ifdef KDTRACE_HOOKS
	int reason = CLD_EXITED;
	if (WCOREDUMP(rv))
		reason = CLD_DUMPED;
	else if (WIFSIGNALED(rv))
		reason = CLD_KILLED;
	SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0);
#endif

	/*
	 * Just delete all entries in the p_klist. At this point we won't
	 * report any more events, and there are nasty race conditions that
	 * can beat us if we don't.
	 */
	knlist_clear(&p->p_klist, 1);

	/*
	 * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
	 * flag set, or if the handler is set to SIG_IGN, notify process
	 * 1 instead (and hope it will handle this situation).
	 */
	PROC_LOCK(p->p_pptr);
	mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
	if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
		struct proc *pp;

		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
		pp = p->p_pptr;
		PROC_UNLOCK(pp);
		proc_reparent(p, initproc);
		p->p_sigparent = SIGCHLD;
		PROC_LOCK(p->p_pptr);

		/*
		 * Notify parent, so in case he was wait(2)ing or
		 * executing waitpid(2) with our pid, he will
		 * continue.
		 */
		wakeup(pp);
	} else
		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

	if (p->p_pptr == initproc)
		psignal(p->p_pptr, SIGCHLD);
	else if (p->p_sigparent != 0) {
		if (p->p_sigparent == SIGCHLD)
			childproc_exited(p);
		else	/* LINUX thread */
			psignal(p->p_pptr, p->p_sigparent);
	}
	sx_xunlock(&proctree_lock);

	/*
	 * The state PRS_ZOMBIE prevents other proesses from sending
	 * signal to the process, to avoid memory leak, we free memory
	 * for signal queue at the time when the state is set.
	 */
	sigqueue_flush(&p->p_sigqueue);
	sigqueue_flush(&td->td_sigqueue);

	/*
	 * We have to wait until after acquiring all locks before
	 * changing p_state.  We need to avoid all possible context
	 * switches (including ones from blocking on a mutex) while
	 * marked as a zombie.  We also have to set the zombie state
	 * before we release the parent process' proc lock to avoid
	 * a lost wakeup.  So, we first call wakeup, then we grab the
	 * sched lock, update the state, and release the parent process'
	 * proc lock.
	 */
	wakeup(p->p_pptr);
	cv_broadcast(&p->p_pwait);
	sched_exit(p->p_pptr, td);
	PROC_SLOCK(p);
	p->p_state = PRS_ZOMBIE;
	PROC_UNLOCK(p->p_pptr);

	/*
	 * Hopefully no one will try to deliver a signal to the process this
	 * late in the game.
	 */
	knlist_destroy(&p->p_klist);

	/*
	 * Save our children's rusage information in our exit rusage.
	 */
	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);

	/*
	 * Make sure the scheduler takes this thread out of its tables etc.
	 * This will also release this thread's reference to the ucred.
	 * Other thread parts to release include pcb bits and such.
	 */
	thread_exit();
}
Example #12
0
/* ARGSUSED */
int
auditctl(struct thread *td, struct auditctl_args *uap)
{
	struct nameidata nd;
	struct ucred *cred;
	struct vnode *vp;
	int error = 0;
	int flags, vfslocked;

	if (jailed(td->td_ucred))
		return (ENOSYS);
	error = priv_check(td, PRIV_AUDIT_CONTROL);
	if (error)
		return (error);

	vp = NULL;
	cred = NULL;

	/*
	 * If a path is specified, open the replacement vnode, perform
	 * validity checks, and grab another reference to the current
	 * credential.
	 *
	 * On Darwin, a NULL path argument is also used to disable audit.
	 */
	if (uap->path == NULL)
		return (EINVAL);

	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
	    UIO_USERSPACE, uap->path, td);
	flags = AUDIT_OPEN_FLAGS;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);
	vfslocked = NDHASGIANT(&nd);
	vp = nd.ni_vp;
#ifdef MAC
	error = mac_system_check_auditctl(td->td_ucred, vp);
	VOP_UNLOCK(vp, 0);
	if (error) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		return (error);
	}
#else
	VOP_UNLOCK(vp, 0);
#endif
	NDFREE(&nd, NDF_ONLY_PNBUF);
	if (vp->v_type != VREG) {
		vn_close(vp, AUDIT_CLOSE_FLAGS, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		return (EINVAL);
	}
	VFS_UNLOCK_GIANT(vfslocked);
	cred = td->td_ucred;
	crhold(cred);

	/*
	 * XXXAUDIT: Should audit_suspended actually be cleared by
	 * audit_worker?
	 */
	audit_suspended = 0;

	audit_rotate_vnode(cred, vp);

	return (error);
}
Example #13
0
static int
link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
{
#ifdef DDB_CTF
	Elf_Ehdr *hdr = NULL;
	Elf_Shdr *shdr = NULL;
	caddr_t ctftab = NULL;
	caddr_t raw = NULL;
	caddr_t shstrtab = NULL;
	elf_file_t ef = (elf_file_t) lf;
	int flags;
	int i;
	int nbytes;
	ssize_t resid;
	int vfslocked;
	size_t sz;
	struct nameidata nd;
	struct thread *td = curthread;
	uint8_t ctf_hdr[CTF_HDR_SIZE];
#endif
	int error = 0;

	if (lf == NULL || lc == NULL)
		return (EINVAL);

	/* Set the defaults for no CTF present. That's not a crime! */
	bzero(lc, sizeof(*lc));

#ifdef DDB_CTF
	/*
	 * First check if we've tried to load CTF data previously and the
	 * CTF ELF section wasn't found. We flag that condition by setting
	 * ctfcnt to -1. See below.
	 */
	if (ef->ctfcnt < 0)
		return (EFTYPE);

	/* Now check if we've already loaded the CTF data.. */
	if (ef->ctfcnt > 0) {
		/* We only need to load once. */
		lc->ctftab = ef->ctftab;
		lc->ctfcnt = ef->ctfcnt;
		lc->symtab = ef->ddbsymtab;
		lc->strtab = ef->ddbstrtab;
		lc->strcnt = ef->ddbstrcnt;
		lc->nsym   = ef->ddbsymcnt;
		lc->ctfoffp = (uint32_t **) &ef->ctfoff;
		lc->typoffp = (uint32_t **) &ef->typoff;
		lc->typlenp = &ef->typlen;
		return (0);
	}

	/*
	 * We need to try reading the CTF data. Flag no CTF data present
	 * by default and if we actually succeed in reading it, we'll
	 * update ctfcnt to the number of bytes read.
	 */
	ef->ctfcnt = -1;

	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, lf->pathname, td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return (error);
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);

	/* Allocate memory for the FLF header. */
	if ((hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read the ELF header. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, hdr, sizeof(*hdr),
	    0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
	    td)) != 0)
		goto out;

	/* Sanity check. */
	if (!IS_ELF(*hdr)) {
		error = ENOEXEC;
		goto out;
	}

	nbytes = hdr->e_shnum * hdr->e_shentsize;
	if (nbytes == 0 || hdr->e_shoff == 0 ||
	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
		error = ENOEXEC;
		goto out;
	}

	/* Allocate memory for all the section headers */
	if ((shdr = malloc(nbytes, M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read all the section headers */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes,
	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td)) != 0)
		goto out;

	/*
	 * We need to search for the CTF section by name, so if the
	 * section names aren't present, then we can't locate the
	 * .SUNW_ctf section containing the CTF data.
	 */
	if (hdr->e_shstrndx == 0 || shdr[hdr->e_shstrndx].sh_type != SHT_STRTAB) {
		printf("%s(%d): module %s e_shstrndx is %d, sh_type is %d\n",
		    __func__, __LINE__, lf->pathname, hdr->e_shstrndx,
		    shdr[hdr->e_shstrndx].sh_type);
		error = EFTYPE;
		goto out;
	}

	/* Allocate memory to buffer the section header strings. */
	if ((shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER,
	    M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/* Read the section header strings. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, shstrtab,
	    shdr[hdr->e_shstrndx].sh_size, shdr[hdr->e_shstrndx].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
	    td)) != 0)
		goto out;

	/* Search for the section containing the CTF data. */
	for (i = 0; i < hdr->e_shnum; i++)
		if (strcmp(".SUNW_ctf", shstrtab + shdr[i].sh_name) == 0)
			break;

	/* Check if the CTF section wasn't found. */
	if (i >= hdr->e_shnum) {
		printf("%s(%d): module %s has no .SUNW_ctf section\n",
		    __func__, __LINE__, lf->pathname);
		error = EFTYPE;
		goto out;
	}

	/* Read the CTF header. */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, ctf_hdr, sizeof(ctf_hdr),
	    shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
	    NOCRED, &resid, td)) != 0)
		goto out;

	/* Check the CTF magic number. (XXX check for big endian!) */
	if (ctf_hdr[0] != 0xf1 || ctf_hdr[1] != 0xcf) {
		printf("%s(%d): module %s has invalid format\n",
		    __func__, __LINE__, lf->pathname);
		error = EFTYPE;
		goto out;
	}

	/* Check if version 2. */
	if (ctf_hdr[2] != 2) {
		printf("%s(%d): module %s CTF format version is %d "
		    "(2 expected)\n",
		    __func__, __LINE__, lf->pathname, ctf_hdr[2]);
		error = EFTYPE;
		goto out;
	}

	/* Check if the data is compressed. */
	if ((ctf_hdr[3] & 0x1) != 0) {
		uint32_t *u32 = (uint32_t *) ctf_hdr;

		/*
		 * The last two fields in the CTF header are the offset
		 * from the end of the header to the start of the string
		 * data and the length of that string data. se this
		 * information to determine the decompressed CTF data
		 * buffer required.
		 */
		sz = u32[CTF_HDR_STRTAB_U32] + u32[CTF_HDR_STRLEN_U32] +
		    sizeof(ctf_hdr);

		/*
		 * Allocate memory for the compressed CTF data, including
		 * the header (which isn't compressed).
		 */
		if ((raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK)) == NULL) {
			error = ENOMEM;
			goto out;
		}
	} else {
		/*
		 * The CTF data is not compressed, so the ELF section
		 * size is the same as the buffer size required.
		 */
		sz = shdr[i].sh_size;
	}

	/*
	 * Allocate memory to buffer the CTF data in it's decompressed
	 * form.
	 */
	if ((ctftab = malloc(sz, M_LINKER, M_WAITOK)) == NULL) {
		error = ENOMEM;
		goto out;
	}

	/*
	 * Read the CTF data into the raw buffer if compressed, or
	 * directly into the CTF buffer otherwise.
	 */
	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, raw == NULL ? ctftab : raw,
	    shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED,
	    td->td_ucred, NOCRED, &resid, td)) != 0)
		goto out;

	/* Check if decompression is required. */
	if (raw != NULL) {
		z_stream zs;
		int ret;

		/*
		 * The header isn't compressed, so copy that into the
		 * CTF buffer first.
		 */
		bcopy(ctf_hdr, ctftab, sizeof(ctf_hdr));

		/* Initialise the zlib structure. */
		bzero(&zs, sizeof(zs));
		zs.zalloc = z_alloc;
		zs.zfree = z_free;

		if (inflateInit(&zs) != Z_OK) {
			error = EIO;
			goto out;
		}

		zs.avail_in = shdr[i].sh_size - sizeof(ctf_hdr);
		zs.next_in = ((uint8_t *) raw) + sizeof(ctf_hdr);
		zs.avail_out = sz - sizeof(ctf_hdr);
		zs.next_out = ((uint8_t *) ctftab) + sizeof(ctf_hdr);
		if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
			printf("%s(%d): zlib inflate returned %d\n", __func__, __LINE__, ret);
			error = EIO;
			goto out;
		}
	}

	/* Got the CTF data! */
	ef->ctftab = ctftab;
	ef->ctfcnt = shdr[i].sh_size;

	/* We'll retain the memory allocated for the CTF data. */
	ctftab = NULL;

	/* Let the caller use the CTF data read. */
	lc->ctftab = ef->ctftab;
	lc->ctfcnt = ef->ctfcnt;
	lc->symtab = ef->ddbsymtab;
	lc->strtab = ef->ddbstrtab;
	lc->strcnt = ef->ddbstrcnt;
	lc->nsym   = ef->ddbsymcnt;
	lc->ctfoffp = (uint32_t **) &ef->ctfoff;
	lc->typoffp = (uint32_t **) &ef->typoff;
	lc->typlenp = &ef->typlen;

out:
	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);

	if (hdr != NULL)
		free(hdr, M_LINKER);
	if (shdr != NULL)
		free(shdr, M_LINKER);
	if (shstrtab != NULL)
		free(shstrtab, M_LINKER);
	if (ctftab != NULL)
		free(ctftab, M_LINKER);
	if (raw != NULL)
		free(raw, M_LINKER);
#else
	error = EOPNOTSUPP;
#endif

	return (error);
}
Example #14
0
static int
link_elf_load_file(linker_class_t cls, const char *filename,
    linker_file_t *result)
{
	struct nameidata nd;
	struct thread *td = curthread;	/* XXX */
	Elf_Ehdr *hdr;
	Elf_Shdr *shdr;
	Elf_Sym *es;
	int nbytes, i, j;
	vm_offset_t mapbase;
	size_t mapsize;
	int error = 0;
	int resid, flags;
	elf_file_t ef;
	linker_file_t lf;
	int symtabindex;
	int symstrindex;
	int shstrindex;
	int nsym;
	int pb, rl, ra;
	int alignmask;
	int vfslocked;

	shdr = NULL;
	lf = NULL;
	mapsize = 0;
	hdr = NULL;

	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, filename, td);
	flags = FREAD;
	error = vn_open(&nd, &flags, 0, NULL);
	if (error)
		return error;
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	if (nd.ni_vp->v_type != VREG) {
		error = ENOEXEC;
		goto out;
	}
#ifdef MAC
	error = mac_kld_check_load(td->td_ucred, nd.ni_vp);
	if (error) {
		goto out;
	}
#endif

	/* Read the elf header from the file. */
	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)hdr, sizeof(*hdr), 0,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = ENOEXEC;
		goto out;
	}

	if (!IS_ELF(*hdr)) {
		error = ENOEXEC;
		goto out;
	}

	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
		link_elf_error(filename, "Unsupported file layout");
		error = ENOEXEC;
		goto out;
	}
	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
	    || hdr->e_version != EV_CURRENT) {
		link_elf_error(filename, "Unsupported file version");
		error = ENOEXEC;
		goto out;
	}
	if (hdr->e_type != ET_REL) {
		error = ENOSYS;
		goto out;
	}
	if (hdr->e_machine != ELF_TARG_MACH) {
		link_elf_error(filename, "Unsupported machine");
		error = ENOEXEC;
		goto out;
	}

	lf = linker_make_file(filename, &link_elf_class);
	if (!lf) {
		error = ENOMEM;
		goto out;
	}
	ef = (elf_file_t) lf;
	ef->nprogtab = 0;
	ef->e_shdr = 0;
	ef->nreltab = 0;
	ef->nrelatab = 0;

	/* Allocate and read in the section header */
	nbytes = hdr->e_shnum * hdr->e_shentsize;
	if (nbytes == 0 || hdr->e_shoff == 0 ||
	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
		error = ENOEXEC;
		goto out;
	}
	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
	ef->e_shdr = shdr;
	error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes, hdr->e_shoff,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td);
	if (error)
		goto out;
	if (resid) {
		error = ENOEXEC;
		goto out;
	}

	/* Scan the section header for information and table sizing. */
	nsym = 0;
	symtabindex = -1;
	symstrindex = -1;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			ef->nprogtab++;
			break;
		case SHT_SYMTAB:
			nsym++;
			symtabindex = i;
			symstrindex = shdr[i].sh_link;
			break;
		case SHT_REL:
			ef->nreltab++;
			break;
		case SHT_RELA:
			ef->nrelatab++;
			break;
		case SHT_STRTAB:
			break;
		}
	}
	if (ef->nprogtab == 0) {
		link_elf_error(filename, "file has no contents");
		error = ENOEXEC;
		goto out;
	}
	if (nsym != 1) {
		/* Only allow one symbol table for now */
		link_elf_error(filename, "file has no valid symbol table");
		error = ENOEXEC;
		goto out;
	}
	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
	    shdr[symstrindex].sh_type != SHT_STRTAB) {
		link_elf_error(filename, "file has invalid symbol strings");
		error = ENOEXEC;
		goto out;
	}

	/* Allocate space for tracking the load chunks */
	if (ef->nprogtab != 0)
		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
		    M_LINKER, M_WAITOK | M_ZERO);
	if (ef->nreltab != 0)
		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
		    M_LINKER, M_WAITOK | M_ZERO);
	if (ef->nrelatab != 0)
		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
		    M_LINKER, M_WAITOK | M_ZERO);

	if (symtabindex == -1)
		panic("lost symbol table index");
	/* Allocate space for and load the symbol table */
	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ef->ddbsymtab,
	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = EINVAL;
		goto out;
	}

	if (symstrindex == -1)
		panic("lost symbol string index");
	/* Allocate space for and load the symbol strings */
	ef->ddbstrcnt = shdr[symstrindex].sh_size;
	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
	error = vn_rdwr(UIO_READ, nd.ni_vp, ef->ddbstrtab,
	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
	    &resid, td);
	if (error)
		goto out;
	if (resid != 0){
		error = EINVAL;
		goto out;
	}

	/* Do we have a string table for the section names?  */
	shstrindex = -1;
	if (hdr->e_shstrndx != 0 &&
	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
		shstrindex = hdr->e_shstrndx;
		ef->shstrcnt = shdr[shstrindex].sh_size;
		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
		    M_WAITOK);
		error = vn_rdwr(UIO_READ, nd.ni_vp, ef->shstrtab,
		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
		    &resid, td);
		if (error)
			goto out;
		if (resid != 0){
			error = EINVAL;
			goto out;
		}
	}

	/* Size up code/data(progbits) and bss(nobits). */
	alignmask = 0;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			alignmask = shdr[i].sh_addralign - 1;
			mapsize += alignmask;
			mapsize &= ~alignmask;
			mapsize += shdr[i].sh_size;
			break;
		}
	}

	/*
	 * We know how much space we need for the text/data/bss/etc.
	 * This stuff needs to be in a single chunk so that profiling etc
	 * can get the bounds and gdb can associate offsets with modules
	 */
	ef->object = vm_object_allocate(OBJT_DEFAULT,
	    round_page(mapsize) >> PAGE_SHIFT);
	if (ef->object == NULL) {
		error = ENOMEM;
		goto out;
	}
	ef->address = (caddr_t) vm_map_min(kernel_map);

	/*
	 * In order to satisfy amd64's architectural requirements on the
	 * location of code and data in the kernel's address space, request a
	 * mapping that is above the kernel.  
	 */
	mapbase = KERNBASE;
	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
	    round_page(mapsize), TRUE, VM_PROT_ALL, VM_PROT_ALL, FALSE);
	if (error) {
		vm_object_deallocate(ef->object);
		ef->object = 0;
		goto out;
	}

	/* Wire the pages */
	error = vm_map_wire(kernel_map, mapbase,
	    mapbase + round_page(mapsize),
	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
	if (error != KERN_SUCCESS) {
		error = ENOMEM;
		goto out;
	}

	/* Inform the kld system about the situation */
	lf->address = ef->address = (caddr_t)mapbase;
	lf->size = mapsize;

	/*
	 * Now load code/data(progbits), zero bss(nobits), allocate space for
	 * and load relocs
	 */
	pb = 0;
	rl = 0;
	ra = 0;
	alignmask = 0;
	for (i = 0; i < hdr->e_shnum; i++) {
		if (shdr[i].sh_size == 0)
			continue;
		switch (shdr[i].sh_type) {
		case SHT_PROGBITS:
		case SHT_NOBITS:
			alignmask = shdr[i].sh_addralign - 1;
			mapbase += alignmask;
			mapbase &= ~alignmask;
			if (ef->shstrtab && shdr[i].sh_name != 0)
				ef->progtab[pb].name =
				    ef->shstrtab + shdr[i].sh_name;
			else if (shdr[i].sh_type == SHT_PROGBITS)
				ef->progtab[pb].name = "<<PROGBITS>>";
			else
				ef->progtab[pb].name = "<<NOBITS>>";
			if (ef->progtab[pb].name != NULL && 
			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
				ef->progtab[pb].addr =
				    dpcpu_alloc(shdr[i].sh_size);
#ifdef VIMAGE
			else if (ef->progtab[pb].name != NULL &&
			    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
				ef->progtab[pb].addr =
				    vnet_data_alloc(shdr[i].sh_size);
#endif
			else
				ef->progtab[pb].addr =
				    (void *)(uintptr_t)mapbase;
			if (ef->progtab[pb].addr == NULL) {
				error = ENOSPC;
				goto out;
			}
			ef->progtab[pb].size = shdr[i].sh_size;
			ef->progtab[pb].sec = i;
			if (shdr[i].sh_type == SHT_PROGBITS) {
				error = vn_rdwr(UIO_READ, nd.ni_vp,
				    ef->progtab[pb].addr,
				    shdr[i].sh_size, shdr[i].sh_offset,
				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
				    NOCRED, &resid, td);
				if (error)
					goto out;
				if (resid != 0){
					error = EINVAL;
					goto out;
				}
				/* Initialize the per-cpu or vnet area. */
				if (ef->progtab[pb].addr != (void *)mapbase &&
				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
					dpcpu_copy(ef->progtab[pb].addr,
					    shdr[i].sh_size);
#ifdef VIMAGE
				else if (ef->progtab[pb].addr !=
				    (void *)mapbase &&
				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
					vnet_data_copy(ef->progtab[pb].addr,
					    shdr[i].sh_size);
#endif
			} else
				bzero(ef->progtab[pb].addr, shdr[i].sh_size);

			/* Update all symbol values with the offset. */
			for (j = 0; j < ef->ddbsymcnt; j++) {
				es = &ef->ddbsymtab[j];
				if (es->st_shndx != i)
					continue;
				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
			}
			mapbase += shdr[i].sh_size;
			pb++;
			break;
		case SHT_REL:
			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
			    M_WAITOK);
			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
			ef->reltab[rl].sec = shdr[i].sh_info;
			error = vn_rdwr(UIO_READ, nd.ni_vp,
			    (void *)ef->reltab[rl].rel,
			    shdr[i].sh_size, shdr[i].sh_offset,
			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
			    &resid, td);
			if (error)
				goto out;
			if (resid != 0){
				error = EINVAL;
				goto out;
			}
			rl++;
			break;
		case SHT_RELA:
			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
			    M_WAITOK);
			ef->relatab[ra].nrela =
			    shdr[i].sh_size / sizeof(Elf_Rela);
			ef->relatab[ra].sec = shdr[i].sh_info;
			error = vn_rdwr(UIO_READ, nd.ni_vp,
			    (void *)ef->relatab[ra].rela,
			    shdr[i].sh_size, shdr[i].sh_offset,
			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
			    &resid, td);
			if (error)
				goto out;
			if (resid != 0){
				error = EINVAL;
				goto out;
			}
			ra++;
			break;
		}
	}
	if (pb != ef->nprogtab)
		panic("lost progbits");
	if (rl != ef->nreltab)
		panic("lost reltab");
	if (ra != ef->nrelatab)
		panic("lost relatab");
	if (mapbase != (vm_offset_t)ef->address + mapsize)
		panic("mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
		    (u_long)mapbase, ef->address, (u_long)mapsize,
		    (u_long)(vm_offset_t)ef->address + mapsize);

	/* Local intra-module relocations */
	link_elf_reloc_local(lf);

	/* Pull in dependencies */
	VOP_UNLOCK(nd.ni_vp, 0);
	error = linker_load_dependencies(lf);
	vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
	if (error)
		goto out;

	/* External relocations */
	error = relocate_file(ef);
	if (error)
		goto out;

	/* Notify MD code that a module is being loaded. */
	error = elf_cpu_load_file(lf);
	if (error)
		goto out;

	*result = lf;

out:
	VOP_UNLOCK(nd.ni_vp, 0);
	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);
	if (error && lf)
		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
	if (hdr)
		free(hdr, M_LINKER);

	return error;
}
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;
	int		vfslocked;

	vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);

	ASSERT(zp->z_phys->zp_links == 0);

	/*
	 * If this is a ZIL replay then leave the object in the unlinked set.
	 * Otherwise we can get a deadlock, because the delete can be
	 * quite large and span multiple tx's and txgs, but each replay
	 * creates a tx to atomically run the replay function and mark the
	 * replay record as complete. We deadlock trying to start a tx in
	 * a new txg to further the deletion but can't because the replay
	 * tx hasn't finished.
	 *
	 * We actually delete the object if we get a failure to create an
	 * object in zil_replay_log_record(), or after calling zil_replay().
	 */
	if (zfsvfs->z_assign >= TXG_INITIAL) {
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
	    (zp->z_phys->zp_flags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);
			zfs_znode_free(zp);
			VFS_UNLOCK_GIANT(vfslocked);
			return;
		}
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		goto out;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		VN_RELE(ZTOV(xzp));
	VFS_UNLOCK_GIANT(vfslocked);
}
Example #16
0
/*
 * Flush all pending data to disk.  This operation will block.
 */
static int
alq_doio(struct alq *alq)
{
    struct thread *td;
    struct mount *mp;
    struct vnode *vp;
    struct uio auio;
    struct iovec aiov[2];
    int totlen;
    int iov;
    int vfslocked;
    int wrapearly;

    KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));

    vp = alq->aq_vp;
    td = curthread;
    totlen = 0;
    iov = 1;
    wrapearly = alq->aq_wrapearly;

    bzero(&aiov, sizeof(aiov));
    bzero(&auio, sizeof(auio));

    /* Start the write from the location of our buffer tail pointer. */
    aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;

    if (alq->aq_writetail < alq->aq_writehead) {
        /* Buffer not wrapped. */
        totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
    } else if (alq->aq_writehead == 0) {
        /* Buffer not wrapped (special case to avoid an empty iov). */
        totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
                                   wrapearly;
    } else {
        /*
         * Buffer wrapped, requires 2 aiov entries:
         * - first is from writetail to end of buffer
         * - second is from start of buffer to writehead
         */
        aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
                          wrapearly;
        iov++;
        aiov[1].iov_base = alq->aq_entbuf;
        aiov[1].iov_len =  alq->aq_writehead;
        totlen = aiov[0].iov_len + aiov[1].iov_len;
    }

    alq->aq_flags |= AQ_FLUSHING;
    ALQ_UNLOCK(alq);

    auio.uio_iov = &aiov[0];
    auio.uio_offset = 0;
    auio.uio_segflg = UIO_SYSSPACE;
    auio.uio_rw = UIO_WRITE;
    auio.uio_iovcnt = iov;
    auio.uio_resid = totlen;
    auio.uio_td = td;

    /*
     * Do all of the junk required to write now.
     */
    vfslocked = VFS_LOCK_GIANT(vp->v_mount);
    vn_start_write(vp, &mp, V_WAIT);
    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    /*
     * XXX: VOP_WRITE error checks are ignored.
     */
#ifdef MAC
    if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
#endif
        VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
    VOP_UNLOCK(vp, 0);
    vn_finished_write(mp);
    VFS_UNLOCK_GIANT(vfslocked);

    ALQ_LOCK(alq);
    alq->aq_flags &= ~AQ_FLUSHING;

    /* Adjust writetail as required, taking into account wrapping. */
    alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
                        alq->aq_buflen;
    alq->aq_freebytes += totlen + wrapearly;

    /*
     * If we just flushed part of the buffer which wrapped, reset the
     * wrapearly indicator.
     */
    if (wrapearly)
        alq->aq_wrapearly = 0;

    /*
     * If we just flushed the buffer completely, reset indexes to 0 to
     * minimise buffer wraps.
     * This is also required to ensure alq_getn() can't wedge itself.
     */
    if (!HAS_PENDING_DATA(alq))
        alq->aq_writehead = alq->aq_writetail = 0;

    KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
            ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));

    if (alq->aq_flags & AQ_WANTED) {
        alq->aq_flags &= ~AQ_WANTED;
        return (1);
    }

    return(0);
}
Example #17
0
/*
 * Clean up the unionfs node.
 */
void
unionfs_noderem(struct vnode *vp, struct thread *td)
{
	int		vfslocked;
	int		count;
	struct unionfs_node *unp, *unp_t1, *unp_t2;
	struct unionfs_node_hashhead *hd;
	struct unionfs_node_status *unsp, *unsp_tmp;
	struct vnode   *lvp;
	struct vnode   *uvp;
	struct vnode   *dvp;

	/*
	 * Use the interlock to protect the clearing of v_data to
	 * prevent faults in unionfs_lock().
	 */
	VI_LOCK(vp);
	unp = VTOUNIONFS(vp);
	lvp = unp->un_lowervp;
	uvp = unp->un_uppervp;
	dvp = unp->un_dvp;
	unp->un_lowervp = unp->un_uppervp = NULLVP;

	vp->v_vnlock = &(vp->v_lock);
	vp->v_data = NULL;
	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
	if (lvp != NULLVP)
		VOP_UNLOCK(lvp, 0, td);
	if (uvp != NULLVP)
		VOP_UNLOCK(uvp, 0, td);
	vp->v_object = NULL;

	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
		unionfs_rem_cached_vnode(unp, dvp);

	if (lvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
		vrele(lvp);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	if (uvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
		vrele(uvp);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	if (dvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
		vrele(dvp);
		VFS_UNLOCK_GIANT(vfslocked);
		unp->un_dvp = NULLVP;
	}
	if (unp->un_path != NULL) {
		free(unp->un_path, M_UNIONFSPATH);
		unp->un_path = NULL;
	}

	if (unp->un_hashtbl != NULL) {
		for (count = 0; count <= unp->un_hashmask; count++) {
			hd = unp->un_hashtbl + count;
			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
				LIST_REMOVE(unp_t1, un_hash);
				unp_t1->un_hash.le_next = NULL;
				unp_t1->un_hash.le_prev = NULL;
			}
		}
		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
	}
Example #18
0
/*
 * Q_QUOTAON - set up a quota file for a particular filesystem.
 */
int
quotaon(struct thread *td, struct mount *mp, int type, void *fname)
{
	struct ufsmount *ump;
	struct vnode *vp, **vpp;
	struct vnode *mvp;
	struct dquot *dq;
	int error, flags, vfslocked;
	struct nameidata nd;

	error = priv_check(td, PRIV_UFS_QUOTAON);
	if (error)
		return (error);

	if (mp->mnt_flag & MNT_RDONLY)
		return (EROFS);

	ump = VFSTOUFS(mp);
	dq = NODQUOT;

	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, fname, td);
	flags = FREAD | FWRITE;
	vfs_ref(mp);
	vfs_unbusy(mp);
	error = vn_open(&nd, &flags, 0, NULL);
	if (error != 0) {
		vfs_rel(mp);
		return (error);
	}
	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vp = nd.ni_vp;
	error = vfs_busy(mp, MBF_NOWAIT);
	vfs_rel(mp);
	if (error == 0) {
		if (vp->v_type != VREG) {
			error = EACCES;
			vfs_unbusy(mp);
		}
	}
	if (error != 0) {
		VOP_UNLOCK(vp, 0);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		return (error);
	}

	UFS_LOCK(ump);
	if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) {
		UFS_UNLOCK(ump);
		VOP_UNLOCK(vp, 0);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		vfs_unbusy(mp);
		return (EALREADY);
	}
	ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING;
	UFS_UNLOCK(ump);
	if ((error = dqopen(vp, ump, type)) != 0) {
		VOP_UNLOCK(vp, 0);
		UFS_LOCK(ump);
		ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
		UFS_UNLOCK(ump);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
		vfs_unbusy(mp);
		return (error);
	}
	VOP_UNLOCK(vp, 0);
	MNT_ILOCK(mp);
	mp->mnt_flag |= MNT_QUOTA;
	MNT_IUNLOCK(mp);

	vpp = &ump->um_quotas[type];
	if (*vpp != vp)
		quotaoff1(td, mp, type);

	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
	vp->v_vflag |= VV_SYSTEM;
	VOP_UNLOCK(vp, 0);
	*vpp = vp;
	VFS_UNLOCK_GIANT(vfslocked);
	/*
	 * Save the credential of the process that turned on quotas.
	 * Set up the time limits for this quota.
	 */
	ump->um_cred[type] = crhold(td->td_ucred);
	ump->um_btime[type] = MAX_DQ_TIME;
	ump->um_itime[type] = MAX_IQ_TIME;
	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
		if (dq->dq_btime > 0)
			ump->um_btime[type] = dq->dq_btime;
		if (dq->dq_itime > 0)
			ump->um_itime[type] = dq->dq_itime;
		dqrele(NULLVP, dq);
	}
	/*
	 * Allow the getdq from getinoquota below to read the quota
	 * from file.
	 */
	UFS_LOCK(ump);
	ump->um_qflags[type] &= ~QTF_CLOSING;
	UFS_UNLOCK(ump);
	/*
	 * Search vnodes associated with this mount point,
	 * adding references to quota file being opened.
	 * NB: only need to add dquot's for inodes being modified.
	 */
again:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto again;
		}
		if (vp->v_type == VNON || vp->v_writecount == 0) {
			VOP_UNLOCK(vp, 0);
			vrele(vp);
			continue;
		}
		error = getinoquota(VTOI(vp));
		VOP_UNLOCK(vp, 0);
		vrele(vp);
		if (error) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			break;
		}
	}

        if (error)
		quotaoff_inchange(td, mp, type);
	UFS_LOCK(ump);
	ump->um_qflags[type] &= ~QTF_OPENING;
	KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0,
		("quotaon: leaking flags"));
	UFS_UNLOCK(ump);

	vfs_unbusy(mp);
	return (error);
}
Example #19
0
/*
 * Main code to turn off disk quotas for a filesystem. Does not change
 * flags.
 */
static int
quotaoff1(struct thread *td, struct mount *mp, int type)
{
	struct vnode *vp;
	struct vnode *qvp, *mvp;
	struct ufsmount *ump;
	struct dquot *dq;
	struct inode *ip;
	struct ucred *cr;
	int vfslocked;
	int error;

	ump = VFSTOUFS(mp);

	UFS_LOCK(ump);
	KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0,
		("quotaoff1: flags are invalid"));
	if ((qvp = ump->um_quotas[type]) == NULLVP) {
		UFS_UNLOCK(ump);
		return (0);
	}
	cr = ump->um_cred[type];
	UFS_UNLOCK(ump);

	/*
	 * Search vnodes associated with this mount point,
	 * deleting any references to quota file being closed.
	 */
again:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vp->v_type == VNON) {
			VI_UNLOCK(vp);
			continue;
		}
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto again;
		}
		ip = VTOI(vp);
		dq = ip->i_dquot[type];
		ip->i_dquot[type] = NODQUOT;
		dqrele(vp, dq);
		VOP_UNLOCK(vp, 0);
		vrele(vp);
	}

	dqflush(qvp);
	/* Clear um_quotas before closing the quota vnode to prevent
	 * access to the closed vnode from dqget/dqsync
	 */
	UFS_LOCK(ump);
	ump->um_quotas[type] = NULLVP;
	ump->um_cred[type] = NOCRED;
	UFS_UNLOCK(ump);

	vfslocked = VFS_LOCK_GIANT(qvp->v_mount);
	vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
	qvp->v_vflag &= ~VV_SYSTEM;
	VOP_UNLOCK(qvp, 0);
	error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);
	crfree(cr);

	return (error);
}
/*
 * Flush all pending data to disk.  This operation will block.
 */
static int
alq_doio(struct alq *alq)
{
	struct thread *td;
	struct mount *mp;
	struct vnode *vp;
	struct uio auio;
	struct iovec aiov[2];
	struct ale *ale;
	struct ale *alstart;
	int totlen;
	int iov;
	int vfslocked;

	vp = alq->aq_vp;
	td = curthread;
	totlen = 0;
	iov = 0;

	alstart = ale = alq->aq_entvalid;
	alq->aq_entvalid = NULL;

	bzero(&aiov, sizeof(aiov));
	bzero(&auio, sizeof(auio));

	do {
		if (aiov[iov].iov_base == NULL)
			aiov[iov].iov_base = ale->ae_data;
		aiov[iov].iov_len += alq->aq_entlen;
		totlen += alq->aq_entlen;
		/* Check to see if we're wrapping the buffer */
		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
			iov++;
		ale->ae_flags &= ~AE_VALID;
		ale = ale->ae_next;
	} while (ale->ae_flags & AE_VALID);

	alq->aq_flags |= AQ_FLUSHING;
	ALQ_UNLOCK(alq);

	if (iov == 2 || aiov[iov].iov_base == NULL)
		iov--;

	auio.uio_iov = &aiov[0];
	auio.uio_offset = 0;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_WRITE;
	auio.uio_iovcnt = iov + 1;
	auio.uio_resid = totlen;
	auio.uio_td = td;

	/*
	 * Do all of the junk required to write now.
	 */
	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_start_write(vp, &mp, V_WAIT);
	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
	/*
	 * XXX: VOP_WRITE error checks are ignored.
	 */
#ifdef MAC
	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
#endif
		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
	VOP_UNLOCK(vp, 0);
	vn_finished_write(mp);
	VFS_UNLOCK_GIANT(vfslocked);

	ALQ_LOCK(alq);
	alq->aq_flags &= ~AQ_FLUSHING;

	if (alq->aq_entfree == NULL)
		alq->aq_entfree = alstart;

	if (alq->aq_flags & AQ_WANTED) {
		alq->aq_flags &= ~AQ_WANTED;
		return (1);
	}

	return(0);
}
Example #21
0
/*
 * Obtain a dquot structure for the specified identifier and quota file
 * reading the information from the file if necessary.
 */
static int
dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type,
    struct dquot **dqp)
{
	uint8_t buf[sizeof(struct dqblk64)];
	off_t base, recsize;
	struct dquot *dq, *dq1;
	struct dqhash *dqh;
	struct vnode *dqvp;
	struct iovec aiov;
	struct uio auio;
	int vfslocked, dqvplocked, error;

#ifdef DEBUG_VFS_LOCKS
	if (vp != NULLVP)
		ASSERT_VOP_ELOCKED(vp, "dqget");
#endif

	if (vp != NULLVP && *dqp != NODQUOT) {
		return (0);
	}

	/* XXX: Disallow negative id values to prevent the
	* creation of 100GB+ quota data files.
	*/
	if ((int)id < 0)
		return (EINVAL);

	UFS_LOCK(ump);
	dqvp = ump->um_quotas[type];
	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
		*dqp = NODQUOT;
		UFS_UNLOCK(ump);
		return (EINVAL);
	}
	vref(dqvp);
	UFS_UNLOCK(ump);
	error = 0;
	dqvplocked = 0;

	/*
	 * Check the cache first.
	 */
	dqh = DQHASH(dqvp, id);
	DQH_LOCK();
	dq = dqhashfind(dqh, id, dqvp);
	if (dq != NULL) {
		DQH_UNLOCK();
hfound:		DQI_LOCK(dq);
		DQI_WAIT(dq, PINOD+1, "dqget");
		DQI_UNLOCK(dq);
		if (dq->dq_ump == NULL) {
			dqrele(vp, dq);
			dq = NODQUOT;
			error = EIO;
		}
		*dqp = dq;
		vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
		if (dqvplocked)
			vput(dqvp);
		else
			vrele(dqvp);
		VFS_UNLOCK_GIANT(vfslocked);
		return (error);
	}

	/*
	 * Quota vnode lock is before DQ_LOCK. Acquire dqvp lock there
	 * since new dq will appear on the hash chain DQ_LOCKed.
	 */
	if (vp != dqvp) {
		DQH_UNLOCK();
		vn_lock(dqvp, LK_SHARED | LK_RETRY);
		dqvplocked = 1;
		DQH_LOCK();
		/*
		 * Recheck the cache after sleep for quota vnode lock.
		 */
		dq = dqhashfind(dqh, id, dqvp);
		if (dq != NULL) {
			DQH_UNLOCK();
			goto hfound;
		}
	}

	/*
	 * Not in cache, allocate a new one or take it from the
	 * free list.
	 */
	if (TAILQ_FIRST(&dqfreelist) == NODQUOT &&
	    numdquot < MAXQUOTAS * desiredvnodes)
		desireddquot += DQUOTINC;
	if (numdquot < desireddquot) {
		numdquot++;
		DQH_UNLOCK();
		dq1 = malloc(sizeof *dq1, M_DQUOT, M_WAITOK | M_ZERO);
		mtx_init(&dq1->dq_lock, "dqlock", NULL, MTX_DEF);
		DQH_LOCK();
		/*
		 * Recheck the cache after sleep for memory.
		 */
		dq = dqhashfind(dqh, id, dqvp);
		if (dq != NULL) {
			numdquot--;
			DQH_UNLOCK();
			mtx_destroy(&dq1->dq_lock);
			free(dq1, M_DQUOT);
			goto hfound;
		}
		dq = dq1;
	} else {
		if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) {
			DQH_UNLOCK();
			tablefull("dquot");
			*dqp = NODQUOT;
			vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
			if (dqvplocked)
				vput(dqvp);
			else
				vrele(dqvp);
			VFS_UNLOCK_GIANT(vfslocked);
			return (EUSERS);
		}
		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
			panic("dqget: free dquot isn't %p", dq);
		TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
		if (dq->dq_ump != NULL)
			LIST_REMOVE(dq, dq_hash);
	}

	/*
	 * Dq is put into hash already locked to prevent parallel
	 * usage while it is being read from file.
	 */
	dq->dq_flags = DQ_LOCK;
	dq->dq_id = id;
	dq->dq_type = type;
	dq->dq_ump = ump;
	LIST_INSERT_HEAD(dqh, dq, dq_hash);
	DQREF(dq);
	DQH_UNLOCK();

	/*
	 * Read the requested quota record from the quota file, performing
	 * any necessary conversions.
	 */
	if (ump->um_qflags[type] & QTF_64BIT) {
		recsize = sizeof(struct dqblk64);
		base = sizeof(struct dqhdr64);
	} else {
		recsize = sizeof(struct dqblk32);
		base = 0;
	}
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	aiov.iov_base = buf;
	aiov.iov_len = recsize;
	auio.uio_resid = recsize;
	auio.uio_offset = base + id * recsize;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_READ;
	auio.uio_td = (struct thread *)0;

	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
	if (auio.uio_resid == recsize && error == 0) {
		bzero(&dq->dq_dqb, sizeof(dq->dq_dqb));
	} else {
		if (ump->um_qflags[type] & QTF_64BIT)
			dqb64_dq((struct dqblk64 *)buf, dq);
		else
			dqb32_dq((struct dqblk32 *)buf, dq);
	}
	if (dqvplocked)
		vput(dqvp);
	else
		vrele(dqvp);
	VFS_UNLOCK_GIANT(vfslocked);
	/*
	 * I/O error in reading quota file, release
	 * quota structure and reflect problem to caller.
	 */
	if (error) {
		DQH_LOCK();
		dq->dq_ump = NULL;
		LIST_REMOVE(dq, dq_hash);
		DQH_UNLOCK();
		DQI_LOCK(dq);
		if (dq->dq_flags & DQ_WANT)
			wakeup(dq);
		dq->dq_flags = 0;
		DQI_UNLOCK(dq);
		dqrele(vp, dq);
		*dqp = NODQUOT;
		return (error);
	}
	DQI_LOCK(dq);
	/*
	 * Check for no limit to enforce.
	 * Initialize time values if necessary.
	 */
	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
		dq->dq_flags |= DQ_FAKE;
	if (dq->dq_id != 0) {
		if (dq->dq_btime == 0) {
			dq->dq_btime = time_second + ump->um_btime[type];
			if (dq->dq_bsoftlimit &&
			    dq->dq_curblocks >= dq->dq_bsoftlimit)
				dq->dq_flags |= DQ_MOD;
		}
		if (dq->dq_itime == 0) {
			dq->dq_itime = time_second + ump->um_itime[type];
			if (dq->dq_isoftlimit &&
			    dq->dq_curinodes >= dq->dq_isoftlimit)
				dq->dq_flags |= DQ_MOD;
		}
	}
	DQI_WAKEUP(dq);
	DQI_UNLOCK(dq);
	*dqp = dq;
	return (0);
}
Example #22
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_doprocmap(PFS_FILL_ARGS)
{
	struct vmspace *vm;
	vm_map_t map;
	vm_map_entry_t entry, tmp_entry;
	struct vnode *vp;
	char *fullpath, *freepath;
	struct uidinfo *uip;
	int error, vfslocked;
	unsigned int last_timestamp;
#ifdef COMPAT_FREEBSD32
	int wrap32 = 0;
#endif

	PROC_LOCK(p);
	error = p_candebug(td, p);
	PROC_UNLOCK(p);
	if (error)
		return (error);

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

#ifdef COMPAT_FREEBSD32
        if (curproc->p_sysent->sv_flags & SV_ILP32) {
                if (!(p->p_sysent->sv_flags & SV_ILP32))
                        return (EOPNOTSUPP);
                wrap32 = 1;
        }
#endif

	vm = vmspace_acquire_ref(p);
	if (vm == NULL)
		return (ESRCH);
	map = &vm->vm_map;
	vm_map_lock_read(map);
	for (entry = map->header.next; entry != &map->header;
	     entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t e_start, e_end, addr;
		int resident, privateresident;
		char *type;
		vm_eflags_t e_eflags;
		vm_prot_t e_prot;

		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
			continue;

		e_eflags = entry->eflags;
		e_prot = entry->protection;
		e_start = entry->start;
		e_end = entry->end;
		privateresident = 0;
		obj = entry->object.vm_object;
		if (obj != NULL) {
			VM_OBJECT_LOCK(obj);
			if (obj->shadow_count == 1)
				privateresident = obj->resident_page_count;
		}
		uip = (entry->uip) ? entry->uip : (obj ? obj->uip : NULL);

		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(map->pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}

		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
			if (tobj != obj)
				VM_OBJECT_LOCK(tobj);
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);
			lobj = tobj;
		}
		last_timestamp = map->timestamp;
		vm_map_unlock_read(map);

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			switch (lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				vp = NULL;
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				type = "swap";
				vp = NULL;
				break;
			case OBJT_SG:
			case OBJT_DEVICE:
				type = "device";
				vp = NULL;
				break;
			}
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);

			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			VM_OBJECT_UNLOCK(obj);
			if (vp != NULL) {
				vn_fullpath(td, vp, &fullpath, &freepath);
				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
				vrele(vp);
				VFS_UNLOCK_GIANT(vfslocked);
			}
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, resident, private resident, cow, access, type,
		 *         charged, charged uid.
		 */
		error = sbuf_printf(sb,
		    "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n",
			(u_long)e_start, (u_long)e_end,
			resident, privateresident,
#ifdef COMPAT_FREEBSD32
			wrap32 ? NULL : obj,	/* Hide 64 bit value */
#else
			obj,
#endif
			(e_prot & VM_PROT_READ)?"r":"-",
			(e_prot & VM_PROT_WRITE)?"w":"-",
			(e_prot & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(e_eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath,
			uip ? "CH":"NCH", uip ? uip->ui_uid : -1);

		if (freepath != NULL)
			free(freepath, M_TEMP);
		vm_map_lock_read(map);
		if (error == -1) {
			error = 0;
			break;
		}
		if (last_timestamp != map->timestamp) {
			/*
			 * Look again for the entry because the map was
			 * modified while it was unlocked.  Specifically,
			 * the entry may have been clipped, merged, or deleted.
			 */
			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
			entry = tmp_entry;
		}
	}
	vm_map_unlock_read(map);
	vmspace_free(vm);
	return (error);
}
/*
 * Create the queue data structure, allocate the buffer, and open the file.
 */
int
alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
    int size, int count)
{
	struct thread *td;
	struct nameidata nd;
	struct ale *ale;
	struct ale *alp;
	struct alq *alq;
	char *bufp;
	int flags;
	int error;
	int i, vfslocked;

	*alqp = NULL;
	td = curthread;

	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
	flags = FWRITE | O_NOFOLLOW | O_CREAT;

	error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL);
	if (error)
		return (error);

	vfslocked = NDHASGIANT(&nd);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	/* We just unlock so we hold a reference */
	VOP_UNLOCK(nd.ni_vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);

	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
	alq->aq_vp = nd.ni_vp;
	alq->aq_cred = crhold(cred);
	alq->aq_entmax = count;
	alq->aq_entlen = size;
	alq->aq_entfree = alq->aq_first;

	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);

	bufp = alq->aq_entbuf;
	ale = alq->aq_first;
	alp = NULL;

	/* Match up entries with buffers */
	for (i = 0; i < count; i++) {
		if (alp)
			alp->ae_next = ale;
		ale->ae_data = bufp;
		alp = ale;
		ale++;
		bufp += size;
	}

	alp->ae_next = alq->aq_first;

	if ((error = ald_add(alq)) != 0)
		return (error);
	*alqp = alq;

	return (0);
}
Example #24
0
/*
 * Set up nameidata for a lookup() call and do it.
 *
 * If pubflag is set, this call is done for a lookup operation on the
 * public filehandle. In that case we allow crossing mountpoints and
 * absolute pathnames. However, the caller is expected to check that
 * the lookup result is within the public fs, and deny access if
 * it is not.
 *
 * nfs_namei() clears out garbage fields that namei() might leave garbage.
 * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
 * error occurs but the parent was not requested.
 *
 * dirp may be set whether an error is returned or not, and must be
 * released by the caller.
 */
int
nfs_namei(struct nameidata *ndp, struct nfsrv_descript *nfsd,
    fhandle_t *fhp, int len, struct nfssvc_sock *slp,
    struct sockaddr *nam, struct mbuf **mdp,
    caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp,
    int *retdirattr_retp, int pubflag)
{
	int i, rem;
	struct mbuf *md;
	char *fromcp, *tocp, *cp;
	struct iovec aiov;
	struct uio auio;
	struct vnode *dp;
	int error, rdonly, linklen;
	struct componentname *cnp = &ndp->ni_cnd;
	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0;
	int dvfslocked;
	int vfslocked;

	vfslocked = 0;
	dvfslocked = 0;
	*retdirp = NULL;
	cnp->cn_flags |= NOMACCHECK;
	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);

	/*
	 * Copy the name from the mbuf list to ndp->ni_pnbuf
	 * and set the various ndp fields appropriately.
	 */
	fromcp = *dposp;
	tocp = cnp->cn_pnbuf;
	md = *mdp;
	rem = mtod(md, caddr_t) + md->m_len - fromcp;
	for (i = 0; i < len; i++) {
		while (rem == 0) {
			md = md->m_next;
			if (md == NULL) {
				error = EBADRPC;
				goto out;
			}
			fromcp = mtod(md, caddr_t);
			rem = md->m_len;
		}
		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
			error = EACCES;
			goto out;
		}
		*tocp++ = *fromcp++;
		rem--;
	}
	*tocp = '\0';
	*mdp = md;
	*dposp = fromcp;
	len = nfsm_rndup(len)-len;
	if (len > 0) {
		if (rem >= len)
			*dposp += len;
		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
			goto out;
	}

	/*
	 * Extract and set starting directory.
	 */
	error = nfsrv_fhtovp(fhp, FALSE, &dp, &dvfslocked,
	    nfsd, slp, nam, &rdonly, pubflag);
	if (error)
		goto out;
	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
	if (dp->v_type != VDIR) {
		vrele(dp);
		error = ENOTDIR;
		goto out;
	}

	if (rdonly)
		cnp->cn_flags |= RDONLY;

	/*
	 * Set return directory.  Reference to dp is implicitly transfered
	 * to the returned pointer
	 */
	*retdirp = dp;
	if (v3) {
		vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
		*retdirattr_retp = VOP_GETATTR(dp, retdirattrp,
			ndp->ni_cnd.cn_cred);
		VOP_UNLOCK(dp, 0);
	}

	if (pubflag) {
		/*
		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
		 * and the 'native path' indicator.
		 */
		cp = uma_zalloc(namei_zone, M_WAITOK);
		fromcp = cnp->cn_pnbuf;
		tocp = cp;
		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
			switch ((unsigned char)*fromcp) {
			case WEBNFS_NATIVE_CHAR:
				/*
				 * 'Native' path for us is the same
				 * as a path according to the NFS spec,
				 * just skip the escape char.
				 */
				fromcp++;
				break;
			/*
			 * More may be added in the future, range 0x80-0xff
			 */
			default:
				error = EIO;
				uma_zfree(namei_zone, cp);
				goto out;
			}
		}
		/*
		 * Translate the '%' escapes, URL-style.
		 */
		while (*fromcp != '\0') {
			if (*fromcp == WEBNFS_ESC_CHAR) {
				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
					fromcp++;
					*tocp++ = HEXSTRTOI(fromcp);
					fromcp += 2;
					continue;
				} else {
					error = ENOENT;
					uma_zfree(namei_zone, cp);
					goto out;
				}
			} else
				*tocp++ = *fromcp++;
		}
		*tocp = '\0';
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		cnp->cn_pnbuf = cp;
	}

	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
	ndp->ni_segflg = UIO_SYSSPACE;

	if (pubflag) {
		ndp->ni_rootdir = rootvnode;
		ndp->ni_loopcnt = 0;
		if (cnp->cn_pnbuf[0] == '/') {
			int tvfslocked;

			tvfslocked = VFS_LOCK_GIANT(rootvnode->v_mount);
			VFS_UNLOCK_GIANT(vfslocked);
			dp = rootvnode;
			vfslocked = tvfslocked;
		}
	} else {
		cnp->cn_flags |= NOCROSSMOUNT;
	}

	/*
	 * Initialize for scan, set ni_startdir and bump ref on dp again
	 * because lookup() will dereference ni_startdir.
	 */

	cnp->cn_thread = curthread;
	VREF(dp);
	ndp->ni_startdir = dp;

	if (!lockleaf)
		cnp->cn_flags |= LOCKLEAF;
	for (;;) {
		cnp->cn_nameptr = cnp->cn_pnbuf;
		/*
		 * Call lookup() to do the real work.  If an error occurs,
		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
		 * we do not have to dereference anything before returning.
		 * In either case ni_startdir will be dereferenced and NULLed
		 * out.
		 */
		if (vfslocked)
			ndp->ni_cnd.cn_flags |= GIANTHELD;
		error = lookup(ndp);
		vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
		ndp->ni_cnd.cn_flags &= ~GIANTHELD;
		if (error)
			break;

		/*
		 * Check for encountering a symbolic link.  Trivial
		 * termination occurs if no symlink encountered.
		 * Note: zfree is safe because error is 0, so we will
		 * not zfree it again when we break.
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if (cnp->cn_flags & (SAVENAME | SAVESTART))
				cnp->cn_flags |= HASBUF;
			else
				uma_zfree(namei_zone, cnp->cn_pnbuf);
			if (ndp->ni_vp && !lockleaf)
				VOP_UNLOCK(ndp->ni_vp, 0);
			break;
		}

		/*
		 * Validate symlink
		 */
		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
			VOP_UNLOCK(ndp->ni_dvp, 0);
		if (!pubflag) {
			error = EINVAL;
			goto badlink2;
		}

		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
			error = ELOOP;
			goto badlink2;
		}
		if (ndp->ni_pathlen > 1)
			cp = uma_zalloc(namei_zone, M_WAITOK);
		else
			cp = cnp->cn_pnbuf;
		aiov.iov_base = cp;
		aiov.iov_len = MAXPATHLEN;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = 0;
		auio.uio_rw = UIO_READ;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_td = NULL;
		auio.uio_resid = MAXPATHLEN;
		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
		if (error) {
		badlink1:
			if (ndp->ni_pathlen > 1)
				uma_zfree(namei_zone, cp);
		badlink2:
			vput(ndp->ni_vp);
			vrele(ndp->ni_dvp);
			break;
		}
		linklen = MAXPATHLEN - auio.uio_resid;
		if (linklen == 0) {
			error = ENOENT;
			goto badlink1;
		}
		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
			error = ENAMETOOLONG;
			goto badlink1;
		}

		/*
		 * Adjust or replace path
		 */
		if (ndp->ni_pathlen > 1) {
			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
			uma_zfree(namei_zone, cnp->cn_pnbuf);
			cnp->cn_pnbuf = cp;
		} else
			cnp->cn_pnbuf[linklen] = '\0';
		ndp->ni_pathlen += linklen;

		/*
		 * Cleanup refs for next loop and check if root directory
		 * should replace current directory.  Normally ni_dvp
		 * becomes the new base directory and is cleaned up when
		 * we loop.  Explicitly null pointers after invalidation
		 * to clarify operation.
		 */
		vput(ndp->ni_vp);
		ndp->ni_vp = NULL;

		if (cnp->cn_pnbuf[0] == '/') {
			vrele(ndp->ni_dvp);
			ndp->ni_dvp = ndp->ni_rootdir;
			VREF(ndp->ni_dvp);
		}
		ndp->ni_startdir = ndp->ni_dvp;
		ndp->ni_dvp = NULL;
	}
	if (!lockleaf)
		cnp->cn_flags &= ~LOCKLEAF;
	if (cnp->cn_flags & GIANTHELD) {
		mtx_unlock(&Giant);
		cnp->cn_flags &= ~GIANTHELD;
	}

	/*
	 * nfs_namei() guarentees that fields will not contain garbage
	 * whether an error occurs or not.  This allows the caller to track
	 * cleanup state trivially.
	 */
out:
	if (error) {
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		ndp->ni_vp = NULL;
		ndp->ni_dvp = NULL;
		ndp->ni_startdir = NULL;
		cnp->cn_flags &= ~HASBUF;
		VFS_UNLOCK_GIANT(vfslocked);
		vfslocked = 0;
	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
		ndp->ni_dvp = NULL;
	}
	/*
	 * This differs from normal namei() in that even on failure we may
	 * return with Giant held due to the dirp return.  Make sure we only
	 * have not recursed however.  The calling code only expects to drop
	 * one acquire.
	 */
	if (vfslocked || dvfslocked)
		ndp->ni_cnd.cn_flags |= GIANTHELD;
	if (vfslocked && dvfslocked)
		VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Example #25
0
/*
 * Update the disk quota in the quota file.
 */
static int
dqsync(struct vnode *vp, struct dquot *dq)
{
	uint8_t buf[sizeof(struct dqblk64)];
	off_t base, recsize;
	struct vnode *dqvp;
	struct iovec aiov;
	struct uio auio;
	int vfslocked, error;
	struct mount *mp;
	struct ufsmount *ump;

#ifdef DEBUG_VFS_LOCKS
	if (vp != NULL)
		ASSERT_VOP_ELOCKED(vp, "dqsync");
#endif

	mp = NULL;
	error = 0;
	if (dq == NODQUOT)
		panic("dqsync: dquot");
	if ((ump = dq->dq_ump) == NULL)
		return (0);
	UFS_LOCK(ump);
	if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP)
		panic("dqsync: file");
	vref(dqvp);
	UFS_UNLOCK(ump);

	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	DQI_LOCK(dq);
	if ((dq->dq_flags & DQ_MOD) == 0) {
		DQI_UNLOCK(dq);
		vrele(dqvp);
		VFS_UNLOCK_GIANT(vfslocked);
		return (0);
	}
	DQI_UNLOCK(dq);

	(void) vn_start_secondary_write(dqvp, &mp, V_WAIT);
	if (vp != dqvp)
		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);

	VFS_UNLOCK_GIANT(vfslocked);
	DQI_LOCK(dq);
	DQI_WAIT(dq, PINOD+2, "dqsync");
	if ((dq->dq_flags & DQ_MOD) == 0)
		goto out;
	dq->dq_flags |= DQ_LOCK;
	DQI_UNLOCK(dq);

	/*
	 * Write the quota record to the quota file, performing any
	 * necessary conversions.  See dqget() for additional details.
	 */
	if (ump->um_qflags[dq->dq_type] & QTF_64BIT) {
		dq_dqb64(dq, (struct dqblk64 *)buf);
		recsize = sizeof(struct dqblk64);
		base = sizeof(struct dqhdr64);
	} else {
		dq_dqb32(dq, (struct dqblk32 *)buf);
		recsize = sizeof(struct dqblk32);
		base = 0;
	}

	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	aiov.iov_base = buf;
	aiov.iov_len = recsize;
	auio.uio_resid = recsize;
	auio.uio_offset = base + dq->dq_id * recsize;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_WRITE;
	auio.uio_td = (struct thread *)0;
	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
	VFS_UNLOCK_GIANT(vfslocked);
	if (auio.uio_resid && error == 0)
		error = EIO;

	DQI_LOCK(dq);
	DQI_WAKEUP(dq);
	dq->dq_flags &= ~DQ_MOD;
out:
	DQI_UNLOCK(dq);
	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	if (vp != dqvp)
		vput(dqvp);
	else
		vrele(dqvp);
	vn_finished_secondary_write(mp);
	VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Example #26
0
/* this call, unlike osi_FlushText, is supposed to discard caches that may
   contain invalid information if a file is written remotely, but that may
   contain valid information that needs to be written back if the file is
   being written locally.  It doesn't subsume osi_FlushText, since the latter
   function may be needed to flush caches that are invalidated by local writes.

   avc->pvnLock is already held, avc->lock is guaranteed not to be held (by
   us, of course).
*/
void
osi_FlushPages(struct vcache *avc, afs_ucred_t *credp)
{
#ifdef AFS_FBSD70_ENV
    int vfslocked;
#endif
    afs_hyper_t origDV;
#if defined(AFS_CACHE_BYPASS)
    /* The optimization to check DV under read lock below is identical a
     * change in CITI cache bypass work.  The problem CITI found in 1999
     * was that this code and background daemon doing prefetching competed
     * for the vcache entry shared lock.  It's not clear to me from the
     * tech report, but it looks like CITI fixed the general prefetch code
     * path as a bonus when experimenting on prefetch for cache bypass, see
     * citi-tr-01-3.
     */
#endif
    ObtainReadLock(&avc->lock);
    /* If we've already purged this version, or if we're the ones
     * writing this version, don't flush it (could lose the
     * data we're writing). */
    if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
	|| ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
	ReleaseReadLock(&avc->lock);
	return;
    }
    ReleaseReadLock(&avc->lock);
    ObtainWriteLock(&avc->lock, 10);
    /* Check again */
    if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
	|| ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
	ReleaseWriteLock(&avc->lock);
	return;
    }
    if (hiszero(avc->mapDV)) {
	hset(avc->mapDV, avc->f.m.DataVersion);
	ReleaseWriteLock(&avc->lock);
	return;
    }

    AFS_STATCNT(osi_FlushPages);
    hset(origDV, avc->f.m.DataVersion);
    afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->f.m.Length);

    ReleaseWriteLock(&avc->lock);
#ifdef AFS_FBSD70_ENV
    vfslocked = VFS_LOCK_GIANT(AFSTOV(avc)->v_mount);
#endif
#ifndef AFS_FBSD70_ENV
    AFS_GUNLOCK();
#endif
    osi_VM_FlushPages(avc, credp);
#ifndef AFS_FBSD70_ENV
    AFS_GLOCK();
#endif
#ifdef AFS_FBSD70_ENV
    VFS_UNLOCK_GIANT(vfslocked);
#endif
    ObtainWriteLock(&avc->lock, 88);

    /* do this last, and to original version, since stores may occur
     * while executing above PUTPAGE call */
    hset(avc->mapDV, origDV);
    ReleaseWriteLock(&avc->lock);
}