Ejemplo n.º 1
0
int
imageboot_setup()
{
	dev_t       dev;
	int         error = 0;
	char *root_path = NULL;

	DBG_TRACE("%s: entry\n", __FUNCTION__);

	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
	if (root_path == NULL)
		return (ENOMEM);

	if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) {
		error = ENOENT;
		goto done;
	}

	printf("%s: root image url is %s\n", __FUNCTION__, root_path);
	error = di_root_image(root_path, rootdevice, &dev);
	if(error) {
		printf("%s: di_root_image failed: %d\n", __FUNCTION__, error);
		goto done;
	}

	rootdev = dev;
	mountroot = NULL;
	printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
	error = vfs_mountroot();

	if (error == 0 && rootvnode != NULL) {
		struct vnode *tvp;
		struct vnode *newdp;

		/*
		 * Get the vnode for '/'.
		 * Set fdp->fd_fd.fd_cdir to reference it.
		 */
		if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
			panic("%s: cannot find root vnode", __FUNCTION__);

		vnode_ref(newdp);
		vnode_put(newdp);
		tvp = rootvnode;
		vnode_rele(tvp);
		filedesc0.fd_cdir = newdp;
		rootvnode = newdp;
		mount_list_lock();
		TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list);
		mount_list_unlock();
		mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
		DBG_TRACE("%s: root switched\n", __FUNCTION__);
	}
done:
	FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);

	DBG_TRACE("%s: exit\n", __FUNCTION__);

	return (error);
}
Ejemplo n.º 2
0
/*
 * Make a new or get existing nullfs node.
 * Vp is the alias vnode, lowervp is the lower vnode.
 *
 * lowervp is assumed to have an iocount on it from the caller
 */
int
null_nodeget(
    struct mount * mp, struct vnode * lowervp, struct vnode * dvp, struct vnode ** vpp, struct componentname * cnp, int root)
{
	struct vnode * vp;
	int error;

	/* Lookup the hash firstly. */
	error = null_hashget(mp, lowervp, vpp);
	/* ENOENT means it wasn't found, EIO is a failure we should bail from, 0 is it
	 * was found */
	if (error != ENOENT) {
		/* null_hashget checked the vid, so if we got something here its legit to
		 * the best of our knowledge*/
		/* if we found something then there is an iocount on vpp,
		   if we didn't find something then vpp shouldn't be used by the caller */
		return error;
	}

	/*
	 * We do not serialize vnode creation, instead we will check for
	 * duplicates later, when adding new vnode to hash.
	 */
	error = vnode_ref(lowervp); // take a ref on lowervp so we let the system know we care about it
	if(error)
	{
		// Failed to get a reference on the lower vp so bail. Lowervp may be gone already.
		return error;
	}

	error = null_getnewvnode(mp, lowervp, dvp, &vp, cnp, root);

	if (error) {
		vnode_rele(lowervp);
		return (error);
	}

	/*
	 * Atomically insert our new node into the hash or vget existing
	 * if someone else has beaten us to it.
	 */
	error = null_hashins(mp, VTONULL(vp), vpp);
	if (error || *vpp != NULL) {
		/* recycle will call reclaim which will get rid of the internals */
		vnode_recycle(vp);
		vnode_put(vp);
		/* if we found vpp, then null_hashins put an iocount on it */
		return error;
	}

	/* vp has an iocount from null_getnewvnode */
	*vpp = vp;

	return (0);
}
Ejemplo n.º 3
0
int
osi_lookupname(char *aname, enum uio_seg seg, int followlink,
	       struct vnode **vpp) {
    vfs_context_t ctx;
    int code, flags;

    flags = 0;
    if (!followlink)
	flags |= VNODE_LOOKUP_NOFOLLOW;
    ctx=vfs_context_create(NULL);
    code = vnode_lookup(aname, flags, vpp, ctx);
    if (!code) { /* get a usecount */
	vnode_ref(*vpp);
	vnode_put(*vpp);
    }
    vfs_context_rele(ctx);
    return code;
}
Ejemplo n.º 4
0
/*
 *	Routine:	macx_swapon
 *	Function:
 *		Syscall interface to add a file to backing store
 */
int
macx_swapon(
	struct macx_swapon_args *args)
{
	int			size = args->size;
	vnode_t			vp = (vnode_t)NULL; 
	struct nameidata 	nd, *ndp;
	register int		error;
	kern_return_t		kr;
	mach_port_t		backing_store;
	memory_object_default_t	default_pager;
	int			i;
	boolean_t		funnel_state;
	off_t			file_size;
	vfs_context_t		ctx = vfs_context_current();
	struct proc		*p =  current_proc();
	int			dp_cluster_size;


	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON);
	AUDIT_ARG(value32, args->priority);

	funnel_state = thread_funnel_set(kernel_flock, TRUE);
	ndp = &nd;

	if ((error = suser(kauth_cred_get(), 0)))
		goto swapon_bailout;

	/*
	 * Get a vnode for the paging area.
	 */
	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
	       (user_addr_t) args->filename, ctx);

	if ((error = namei(ndp)))
		goto swapon_bailout;
	nameidone(ndp);
	vp = ndp->ni_vp;

	if (vp->v_type != VREG) {
		error = EINVAL;
		goto swapon_bailout;
	}

	/* get file size */
	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
		goto swapon_bailout;
#if CONFIG_MACF
	vnode_lock(vp);
	error = mac_system_check_swapon(vfs_context_ucred(ctx), vp);
	vnode_unlock(vp);
	if (error)
		goto swapon_bailout;
#endif

	/* resize to desired size if it's too small */
	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
		goto swapon_bailout;

	if (default_pager_init_flag == 0) {
		start_def_pager(NULL);
		default_pager_init_flag = 1;
	}

	/* add new backing store to list */
	i = 0;
	while(bs_port_table[i].vp != 0) {
		if(i == MAX_BACKING_STORE)
			break;
		i++;
	}
	if(i == MAX_BACKING_STORE) {
	   	error = ENOMEM;
		goto swapon_bailout;
	}

	/* remember the vnode. This vnode has namei() reference */
	bs_port_table[i].vp = vp;
	
	/*
	 * Look to see if we are already paging to this file.
	 */
	/* make certain the copy send of kernel call will work */
	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
	kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
	if(kr != KERN_SUCCESS) {
	   error = EAGAIN;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	if (vp->v_mount->mnt_kern_flag & MNTK_SSD) {
		/*
		 * keep the cluster size small since the
		 * seek cost is effectively 0 which means
		 * we don't care much about fragmentation
		 */
		dp_isssd = TRUE;
		dp_cluster_size = 2 * PAGE_SIZE;
	} else {
		/*
		 * use the default cluster size
		 */
		dp_isssd = FALSE;
		dp_cluster_size = 0;
	}
	kr = default_pager_backing_store_create(default_pager, 
					-1, /* default priority */
					dp_cluster_size,
					&backing_store);
	memory_object_default_deallocate(default_pager);

	if(kr != KERN_SUCCESS) {
	   error = ENOMEM;
	   bs_port_table[i].vp = 0;
	   goto swapon_bailout;
	}

	/* Mark this vnode as being used for swapfile */
	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);

	/*
	 * NOTE: we are able to supply PAGE_SIZE here instead of
	 *	an actual record size or block number because:
	 *	a: we do not support offsets from the beginning of the
	 *		file (allowing for non page size/record modulo offsets.
	 *	b: because allow paging will be done modulo page size
	 */

	kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp,
				PAGE_SIZE, (int)(file_size/PAGE_SIZE));
	if(kr != KERN_SUCCESS) {
	   bs_port_table[i].vp = 0;
	   if(kr == KERN_INVALID_ARGUMENT)
		error = EINVAL;
	   else 
		error = ENOMEM;

	   /* This vnode is not to be used for swapfile */
	   vnode_lock_spin(vp);
	   CLR(vp->v_flag, VSWAP);
	   vnode_unlock(vp);

	   goto swapon_bailout;
	}
	bs_port_table[i].bs = (void *)backing_store;
	error = 0;

	ubc_setthreadcred(vp, p, current_thread());

	/*
	 * take a long term reference on the vnode to keep
	 * vnreclaim() away from this vnode.
	 */
	vnode_ref(vp);

swapon_bailout:
	if (vp) {
		vnode_put(vp);
	}
	(void) thread_funnel_set(kernel_flock, FALSE);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
Ejemplo n.º 5
0
static int
zfs_vfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
{
	char	*osname = NULL;
	size_t  osnamelen = 0;
	int		error = 0;
	int		canwrite;
	/*
	 * Get the objset name (the "special" mount argument).
	 * The filesystem that we mount as root is defined in the
	 * "zfs-bootfs" property. 
	 */
	if (data) {
		user_addr_t fspec = USER_ADDR_NULL;
#ifndef __APPLE__
		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
		    DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) !=
		    DDI_SUCCESS)
			return (EIO);

		error = parse_bootpath(zfs_bootpath, rootfs.bo_name);
		ddi_prop_free(zfs_bootpath);
#endif
		osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);

		if (vfs_context_is64bit(context)) {
			if ( (error = copyin(data, (caddr_t)&fspec, sizeof(fspec))) )
				goto out;	
		} else {
#ifdef ZFS_LEOPARD_ONLY
			char *tmp;
#else
			user32_addr_t tmp;
#endif
			if ( (error = copyin(data, (caddr_t)&tmp, sizeof(tmp))) )
				goto out;	
			/* munge into LP64 addr */
			fspec = CAST_USER_ADDR_T(tmp);
		}
		if ( (error = copyinstr(fspec, osname, MAXPATHLEN, &osnamelen)) )
			goto out;
	}

#if 0
	if (mvp->v_type != VDIR)
		return (ENOTDIR);

	mutex_enter(&mvp->v_lock);
	if ((uap->flags & MS_REMOUNT) == 0 &&
	    (uap->flags & MS_OVERLAY) == 0 &&
	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
		mutex_exit(&mvp->v_lock);
		return (EBUSY);
	}
	mutex_exit(&mvp->v_lock);

	/*
	 * ZFS does not support passing unparsed data in via MS_DATA.
	 * Users should use the MS_OPTIONSTR interface; this means
	 * that all option parsing is already done and the options struct
	 * can be interrogated.
	 */
	if ((uap->flags & MS_DATA) && uap->datalen > 0)
		return (EINVAL);

	/*
	 * Get the objset name (the "special" mount argument).
	 */
	if (error = pn_get(uap->spec, fromspace, &spn))
		return (error);

	osname = spn.pn_path;
#endif
	/*
	 * Check for mount privilege?
	 *
	 * If we don't have privilege then see if
	 * we have local permission to allow it
	 */
#ifndef __APPLE__
	error = secpolicy_fs_mount(cr, mvp, vfsp);
	if (error) {
		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
		if (error == 0) {
			vattr_t		vattr;

			/*
			 * Make sure user is the owner of the mount point
			 * or has sufficient privileges.
			 */

			vattr.va_mask = AT_UID;

			if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) {
				goto out;
			}

			if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) {
				goto out;
			}

			if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) {
				goto out;
			}

			secpolicy_fs_mount_clearopts(cr, vfsp);
		} else {
			goto out;
		}
	}
#endif

	error = zfs_domount(mp, 0, osname, context);
	if (error)
		printf("zfs_vfs_mount: error %d\n", error);
	if (error == 0) {
		zfsvfs_t *zfsvfs = NULL;

		/* Make the Finder treat sub file systems just like a folder */
		if (strpbrk(osname, "/"))
			vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DONTBROWSE));

		/* Indicate to VFS that we support ACLs. */
		vfs_setextendedsecurity(mp);

		/* Advisory locking should be handled at the VFS layer */
		vfs_setlocklocal(mp);

		/*
		 * Mac OS X needs a file system modify time
		 *
		 * We use the mtime of the "com.apple.system.mtime" 
		 * extended attribute, which is associated with the
		 * file system root directory.
		 *
		 * Here we need to take a ref on z_mtime_vp to keep it around.
		 * If the attribute isn't there, attempt to create it.
		 */
		zfsvfs = vfs_fsprivate(mp);
		if (zfsvfs->z_mtime_vp == NULL) {
			struct vnode * rvp;
			struct vnode *xdvp = NULLVP;
			struct vnode *xvp = NULLVP;
			znode_t *rootzp;
			timestruc_t modify_time;
			cred_t  *cr;
			timestruc_t  now;
			int flag;
			int result;

			if (zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp) != 0) {
				goto out;
			}
			rvp = ZTOV(rootzp);
			cr = (cred_t *)vfs_context_ucred(context);

			/* Grab the hidden attribute directory vnode. */
			result = zfs_get_xattrdir(rootzp, &xdvp, cr, CREATE_XATTR_DIR);
			vnode_put(rvp);	/* all done with root vnode */
			rvp = NULL;
			if (result) {
				goto out;
			}

			/*
			 * HACK - workaround missing vnode_setnoflush() KPI...
			 *
			 * We tag zfsvfs so that zfs_attach_vnode() can then set
			 * vnfs_marksystem when the vnode gets created.
			 */
			zfsvfs->z_last_unmount_time = 0xBADC0DE;
			zfsvfs->z_last_mtime_synced = VTOZ(xdvp)->z_id;
			flag = vfs_isrdonly(mp) ? 0 : ZEXISTS;
			/* Lookup or create the named attribute. */
			if ( zfs_obtain_xattr(VTOZ(xdvp), ZFS_MTIME_XATTR,
			                          S_IRUSR | S_IWUSR, cr, &xvp,
			                          flag) ) {
					zfsvfs->z_last_unmount_time = 0;
					zfsvfs->z_last_mtime_synced = 0;
					vnode_put(xdvp);
					goto out;
				}
				gethrestime(&now);
			ZFS_TIME_ENCODE(&now, VTOZ(xvp)->z_phys->zp_mtime);
			vnode_put(xdvp);
			vnode_ref(xvp);

			zfsvfs->z_mtime_vp = xvp;
			ZFS_TIME_DECODE(&modify_time, VTOZ(xvp)->z_phys->zp_mtime);
			zfsvfs->z_last_unmount_time = modify_time.tv_sec;
			zfsvfs->z_last_mtime_synced = modify_time.tv_sec;

			/*
			 * Keep this referenced vnode from impeding an unmount.
			 *
			 * XXX vnode_setnoflush() is MIA from KPI (see workaround above).
			 */
#if 0
			vnode_setnoflush(xvp);
#endif
			vnode_put(xvp);
		}
	}
out:
	if (osname) {
		kmem_free(osname, MAXPATHLEN);
	}
	return (error);
}
Ejemplo n.º 6
0
/*
 * Function: devfs_kernel_mount
 * Purpose:
 *   Mount devfs at the given mount point from within the kernel.
 */
int
devfs_kernel_mount(char * mntname)
{
	struct mount *mp;
	int error;
	struct nameidata nd;
	struct vnode  * vp;
	vfs_context_t ctx = vfs_context_kernel();
	struct vfstable *vfsp;

	/* Find our vfstable entry */
	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
		if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name)))
			break;
	
	if (!vfsp) {
		panic("Could not find entry in vfsconf for devfs.\n");
	} 

	/*
	 * Get vnode to be covered
	 */
	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
	    CAST_USER_ADDR_T(mntname), ctx);
	if ((error = namei(&nd))) {
	    printf("devfs_kernel_mount: failed to find directory '%s', %d", 
		   mntname, error);
	    return (error);
	}
	nameidone(&nd);
	vp = nd.ni_vp;

	if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
	    printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error);
	    vnode_put(vp);
	    return (error);
	}
	if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
	    printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error);
	    vnode_put(vp);
	    return (error);
	}
	if (vnode_isdir(vp) == 0) {
	    printf("devfs_kernel_mount: '%s' is not a directory\n", mntname);
	    vnode_put(vp);
	    return (ENOTDIR);
	}
	if ((vnode_mountedhere(vp))) {
	    vnode_put(vp);
	    return (EBUSY);
	}

	/*
	 * Allocate and initialize the filesystem.
	 */
	MALLOC_ZONE(mp, struct mount *, sizeof(struct mount),
		M_MOUNT, M_WAITOK);
	bzero((char *)mp, sizeof(struct mount));

	/* Initialize the default IO constraints */
	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
	mp->mnt_ioflags = 0;
	mp->mnt_realrootvp = NULLVP;
	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;

	mount_lock_init(mp);
	TAILQ_INIT(&mp->mnt_vnodelist);
	TAILQ_INIT(&mp->mnt_workerqueue);
	TAILQ_INIT(&mp->mnt_newvnodes);

	(void)vfs_busy(mp, LK_NOWAIT);
	mp->mnt_op = &devfs_vfsops;
	mp->mnt_vtable = vfsp;
	mp->mnt_flag = 0;
	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
	strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
	vp->v_mountedhere = mp;
	mp->mnt_vnodecovered = vp;
	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get());
	(void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0);
#if CONFIG_MACF
	mac_mount_label_init(mp);
	mac_mount_label_associate(ctx, mp);
#endif

	error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx);

	if (error) {
	    printf("devfs_kernel_mount: mount %s failed: %d", mntname, error);
	    mp->mnt_vtable->vfc_refcount--;

	    vfs_unbusy(mp);

	    mount_lock_destroy(mp);
#if CONFIG_MACF
	    mac_mount_label_destroy(mp);
#endif
	    FREE_ZONE(mp, sizeof (struct mount), M_MOUNT);
	    vnode_put(vp);
	    return (error);
	}
	vnode_ref(vp);
	vnode_put(vp);
	vfs_unbusy(mp);
	mount_list_add(mp);
	return (0);
}
Ejemplo n.º 7
0
struct vcache *
osi_dnlc_lookup(struct vcache *adp, char *aname, int locktype)
{
    struct vcache *tvc;
    unsigned int key, skey;
    char *ts = aname;
    struct nc *tnc;
    int safety;
#ifdef AFS_DARWIN80_ENV
    vnode_t tvp;
#endif

    if (!afs_usednlc)
      return 0;

    dnlcHash(ts, key);		/* leaves ts pointing at the NULL */
    if (ts - aname >= AFSNCNAMESIZE)
      return 0;
    skey = key & (NHSIZE - 1);

    TRACE(osi_dnlc_lookupT, skey);
    dnlcstats.lookups++;

    ObtainReadLock(&afs_xvcache);
    ObtainReadLock(&afs_xdnlc);

    for (tvc = NULL, tnc = nameHash[skey], safety = 0; tnc;
	 tnc = tnc->next, safety++) {
	if ( /* (tnc->key == key)  && */ (tnc->dirp == adp)
	    && (!strcmp((char *)tnc->name, aname))) {
	    tvc = tnc->vp;
	    break;
	} else if (tnc->next == nameHash[skey]) {	/* end of list */
	    break;
	} else if (safety > NCSIZE) {
	    afs_warn("DNLC cycle");
	    dnlcstats.cycles++;
	    ReleaseReadLock(&afs_xdnlc);
	    ReleaseReadLock(&afs_xvcache);
	    osi_dnlc_purge();
	    return (0);
	}
    }

    ReleaseReadLock(&afs_xdnlc);

    if (!tvc) {
	ReleaseReadLock(&afs_xvcache);
	dnlcstats.misses++;
    } else {
	if ((tvc->f.states & CVInit)
#ifdef  AFS_DARWIN80_ENV
	    ||(tvc->f.states & CDeadVnode)
#endif
	    )
	{
	    ReleaseReadLock(&afs_xvcache);
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    return 0;
	}
#if defined(AFS_DARWIN80_ENV)
	tvp = AFSTOV(tvc);
	if (vnode_get(tvp)) {
	    ReleaseReadLock(&afs_xvcache);
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    return 0;
	}
	if (vnode_ref(tvp)) {
	    ReleaseReadLock(&afs_xvcache);
	    AFS_GUNLOCK();
	    vnode_put(tvp);
	    AFS_GLOCK();
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    return 0;
	}
#else
	osi_vnhold(tvc, 0);
#endif
	ReleaseReadLock(&afs_xvcache);

    }

    return tvc;
}
Ejemplo n.º 8
0
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;
	struct posix_cred temp_pcred;
#if NFSCLIENT || CONFIG_IMAGEBOOT
	boolean_t       netboot = FALSE;
#endif

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	throttle_init();

	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

#if CONFIG_DEV_KMEM
	bsd_init_kprintf("calling dev_kmem_init\n");
	dev_kmem_init();
#endif

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#if CONFIG_FINE_LOCK_GROUPS
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#if CONFIG_FINE_LOCK_GROUPS
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#else
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#endif

	assert(bsd_simul_execs != 0);
	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;

#if defined (__i386__) || defined (__x86_64__)
	/*
	 * We currently only support this on i386/x86_64, as that is the
	 * only lock code we have instrumented so far.
	 */
	check_policy_init(policy_check_flags);
#endif
#endif /* MAC */

	/* Initialize System Override call */
	init_system_override();
	
	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;
	kernproc->p_uniqueid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_lflag = 0;
	kernproc->p_ladvflag = 0;
	
#if DEVELOPMENT || DEBUG
	if (bootarg_disable_aslr)
		kernproc->p_flag |= P_DISABLE_ASLR;
#endif

	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	bzero(&temp_pcred, sizeof(temp_pcred));
	temp_pcred.cr_ngroups = 1;
	/* kern_proc, shouldn't call up to DS for group membership */
	temp_pcred.cr_flags = CRF_NOMEMBERD;
	temp_cred.cr_audit.as_aia_p = audit_default_aia_p;
	
	bsd_init_kprintf("calling kauth_cred_create\n");
	/*
	 * We have to label the temp cred before we create from it to
	 * properly set cr_ngroups, or the create will fail.
	 */
	posix_cred_label(&temp_cred, &temp_pcred);
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* update cred on proc */
	PROC_UPDATE_CREDS_ONPROC(kernproc);

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
#endif

	/* Create the file descriptor table. */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for one process: launchd.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		assert(bsd_pageable_map_size != 0);
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD),
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/*
	 * Initialize device-switches.
	 */
	bsd_init_kprintf("calling devsw_init() \n");
	devsw_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if CONFIG_PROC_UUID_POLICY
	/* Initial proc_uuid_policy subsystem */
	bsd_init_kprintf("calling proc_uuid_policy_init()\n");
	proc_uuid_policy_init();
#endif

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();
	bsd_init_kprintf("calling select_waitq_init\n");
	select_waitq_init();

	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
	iptap_init();
#if FLOW_DIVERT
	flow_divert_init();
#endif	/* FLOW_DIVERT */
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_FREEZE
#ifndef CONFIG_MEMORYSTATUS
    #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
#endif
	/* Initialise background freezing */
	bsd_init_kprintf("calling memorystatus_freeze_init\n");
	memorystatus_freeze_init();
#endif

#if CONFIG_MEMORYSTATUS
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling memorystatus_init\n");
	memorystatus_init();
#endif /* CONFIG_MEMORYSTATUS */

	bsd_init_kprintf("calling macx_init\n");
	macx_init();

	bsd_init_kprintf("calling acct_init\n");
	acct_init();

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif
#if NGIF
	/* Initialize gif interface (after lo0) */
	gif_init();
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
#if CONTENT_FILTER
	cfil_init();
#endif

#if PACKET_MANGLER
	pkt_mnglr_init();
#endif	

#if NECP
	/* Initialize Network Extension Control Policies */
	necp_init();
#endif

	netagent_init();

	/* register user tunnel kernel control handler */
	utun_register_control();
#if IPSEC
	ipsec_register_control();
#endif /* IPSEC */
	netsrc_init();
	nstat_init();
	tcp_cc_init();
#if MPTCP
	mptcp_control_register();
#endif /* MPTCP */
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();
#if NFSCLIENT
		netboot = (mountroot == netboot_mountroot);
#endif

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (netboot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (netboot) {
		int err;

		netboot = TRUE;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
	if (netboot == FALSE && imageboot_needed()) {
		/* 
		 * An image was found.  No turning back: we're booted
		 * with a kernel from the disk image.
		 */
		imageboot_setup(); 
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */

	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
#endif

	pal_kernel_announce();

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif


	bsd_init_kprintf("done\n");
}
Ejemplo n.º 9
0
static errno_t
fuse_vfsop_mount(mount_t mp, __unused vnode_t devvp, user_addr_t udata,
                 vfs_context_t context)
{
    int err      = 0;
    int mntopts  = 0;
    bool mounted = false;

    uint32_t drandom  = 0;
    uint32_t max_read = ~0;

    size_t len;

    fuse_device_t      fdev = NULL;
    struct fuse_data  *data = NULL;
    fuse_mount_args    fusefs_args;
    struct vfsstatfs  *vfsstatfsp = vfs_statfs(mp);

    kern_return_t kr;
    thread_t      init_thread;

#if M_OSXFUSE_ENABLE_BIG_LOCK
    fuse_biglock_t    *biglock;
#endif

    fuse_trace_printf_vfsop();

    if (vfs_isupdate(mp)) {
        return ENOTSUP;
    }

    err = copyin(udata, &fusefs_args, sizeof(fusefs_args));
    if (err) {
        return EINVAL;
    }

    /*
     * Interesting flags that we can receive from mount or may want to
     * otherwise forcibly set include:
     *
     *     MNT_ASYNC
     *     MNT_AUTOMOUNTED
     *     MNT_DEFWRITE
     *     MNT_DONTBROWSE
     *     MNT_IGNORE_OWNERSHIP
     *     MNT_JOURNALED
     *     MNT_NODEV
     *     MNT_NOEXEC
     *     MNT_NOSUID
     *     MNT_NOUSERXATTR
     *     MNT_RDONLY
     *     MNT_SYNCHRONOUS
     *     MNT_UNION
     */

#if M_OSXFUSE_ENABLE_UNSUPPORTED
    vfs_setlocklocal(mp);
#endif /* M_OSXFUSE_ENABLE_UNSUPPORTED */

    /** Option Processing. **/

    if (fusefs_args.altflags & FUSE_MOPT_FSTYPENAME) {
        size_t typenamelen = strlen(fusefs_args.fstypename);
        if ((typenamelen <= 0) || (typenamelen > FUSE_FSTYPENAME_MAXLEN)) {
            return EINVAL;
        }
        snprintf(vfsstatfsp->f_fstypename, MFSTYPENAMELEN, "%s%s",
                 OSXFUSE_FSTYPENAME_PREFIX, fusefs_args.fstypename);
    }

    if ((fusefs_args.daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) ||
        (fusefs_args.daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_SPARSE) {
        mntopts |= FSESS_SPARSE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_SLOW_STATFS) {
        mntopts |= FSESS_SLOW_STATFS;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_CACHE) {
        mntopts |= FSESS_AUTO_CACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_XATTR) {
        if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
            return EINVAL;
        }
        mntopts |= FSESS_AUTO_XATTR;
    } else if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
        mntopts |= FSESS_NATIVE_XATTR;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_BROWSE) {
        vfs_setflags(mp, MNT_DONTBROWSE);
    }

    if (fusefs_args.altflags & FUSE_MOPT_JAIL_SYMLINKS) {
        mntopts |= FSESS_JAIL_SYMLINKS;
    }

    /*
     * Note that unlike Linux, which keeps allow_root in user-space and
     * passes allow_other in that case to the kernel, we let allow_root
     * reach the kernel. The 'if' ordering is important here.
     */
    if (fusefs_args.altflags & FUSE_MOPT_ALLOW_ROOT) {
        int is_member = 0;
        if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group,
                                     &is_member) == 0) && is_member) {
            mntopts |= FSESS_ALLOW_ROOT;
        } else {
            IOLog("OSXFUSE: caller not a member of OSXFUSE admin group (%d)\n",
                  fuse_admin_group);
            return EPERM;
        }
    } else if (fusefs_args.altflags & FUSE_MOPT_ALLOW_OTHER) {
        if (!fuse_allow_other && !fuse_vfs_context_issuser(context)) {
            int is_member = 0;
            if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group,
                                         &is_member) != 0) || !is_member) {
                return EPERM;
            }
        }
        mntopts |= FSESS_ALLOW_OTHER;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEDOUBLE) {
        mntopts |= FSESS_NO_APPLEDOUBLE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEXATTR) {
        mntopts |= FSESS_NO_APPLEXATTR;
    }

    if ((fusefs_args.altflags & FUSE_MOPT_FSID) && (fusefs_args.fsid != 0)) {
        fsid_t   fsid;
        mount_t  other_mp;
        uint32_t target_dev;

        target_dev = FUSE_MAKEDEV(FUSE_CUSTOM_FSID_DEVICE_MAJOR,
                                  fusefs_args.fsid);

        fsid.val[0] = target_dev;
        fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

        other_mp = vfs_getvfs(&fsid);
        if (other_mp != NULL) {
            return EPERM;
        }

        vfsstatfsp->f_fsid.val[0] = target_dev;
        vfsstatfsp->f_fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

    } else {
        vfs_getnewfsid(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_LOCALCACHES) {
        mntopts |= FSESS_NO_ATTRCACHE;
        mntopts |= FSESS_NO_READAHEAD;
        mntopts |= FSESS_NO_UBC;
        mntopts |= FSESS_NO_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_ATTRCACHE) {
        mntopts |= FSESS_NO_ATTRCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_READAHEAD) {
        mntopts |= FSESS_NO_READAHEAD;
    }

    if (fusefs_args.altflags & (FUSE_MOPT_NO_UBC | FUSE_MOPT_DIRECT_IO)) {
        mntopts |= FSESS_NO_UBC;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_VNCACHE) {
        mntopts |= FSESS_NO_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NEGATIVE_VNCACHE) {
        if (mntopts & FSESS_NO_VNCACHE) {
            return EINVAL;
        }
        mntopts |= FSESS_NEGATIVE_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCWRITES) {

        /* Cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'. */
        if (mntopts & (FSESS_NO_READAHEAD | FSESS_NO_UBC)) {
            return EINVAL;
        }

        mntopts |= FSESS_NO_SYNCWRITES;
        vfs_clearflags(mp, MNT_SYNCHRONOUS);
        vfs_setflags(mp, MNT_ASYNC);

        /* We check for this only if we have nosyncwrites in the first place. */
        if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCONCLOSE) {
            mntopts |= FSESS_NO_SYNCONCLOSE;
        }

    } else {
        vfs_clearflags(mp, MNT_ASYNC);
        vfs_setflags(mp, MNT_SYNCHRONOUS);
    }

    if (mntopts & FSESS_NO_UBC) {
        /* If no buffer cache, disallow exec from file system. */
        vfs_setflags(mp, MNT_NOEXEC);
    }

    vfs_setauthopaque(mp);
    vfs_setauthopaqueaccess(mp);

    if ((fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) &&
        (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) {
        mntopts |= FSESS_DEFAULT_PERMISSIONS;
        vfs_clearauthopaque(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS) {
        mntopts |= FSESS_DEFER_PERMISSIONS;
    }

    if (fusefs_args.altflags & FUSE_MOPT_EXTENDED_SECURITY) {
        mntopts |= FSESS_EXTENDED_SECURITY;
        vfs_setextendedsecurity(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_LOCALVOL) {
        mntopts |= FSESS_LOCALVOL;
        vfs_setflags(mp, MNT_LOCAL);
    }

    /* done checking incoming option bits */

    err = 0;

    vfs_setfsprivate(mp, NULL);

    fdev = fuse_device_get(fusefs_args.rdev);
    if (!fdev) {
        return EINVAL;
    }

    fuse_device_lock(fdev);

    drandom = fuse_device_get_random(fdev);
    if (fusefs_args.random != drandom) {
        fuse_device_unlock(fdev);
        IOLog("OSXFUSE: failing mount because of mismatched random\n");
        return EINVAL;
    }

    data = fuse_device_get_mpdata(fdev);

    if (!data) {
        fuse_device_unlock(fdev);
        return ENXIO;
    }

#if M_OSXFUSE_ENABLE_BIG_LOCK
    biglock = data->biglock;
    fuse_biglock_lock(biglock);
#endif

    if (data->mount_state != FM_NOTMOUNTED) {
#if M_OSXFUSE_ENABLE_BIG_LOCK
        fuse_biglock_unlock(biglock);
#endif
        fuse_device_unlock(fdev);
        return EALREADY;
    }

    if (!(data->dataflags & FSESS_OPENED)) {
        fuse_device_unlock(fdev);
        err = ENXIO;
        goto out;
    }

    data->mount_state = FM_MOUNTED;
    OSAddAtomic(1, (SInt32 *)&fuse_mount_count);
    mounted = true;

    if (fdata_dead_get(data)) {
        fuse_device_unlock(fdev);
        err = ENOTCONN;
        goto out;
    }

    if (!data->daemoncred) {
        panic("OSXFUSE: daemon found but identity unknown");
    }

    if (fuse_vfs_context_issuser(context) &&
        kauth_cred_getuid(vfs_context_ucred(context)) != kauth_cred_getuid(data->daemoncred)) {
        fuse_device_unlock(fdev);
        err = EPERM;
        goto out;
    }

    data->mp = mp;
    data->fdev = fdev;
    data->dataflags |= mntopts;

    data->daemon_timeout.tv_sec =  fusefs_args.daemon_timeout;
    data->daemon_timeout.tv_nsec = 0;
    if (data->daemon_timeout.tv_sec) {
        data->daemon_timeout_p = &(data->daemon_timeout);
    } else {
        data->daemon_timeout_p = (struct timespec *)0;
    }

    data->max_read = max_read;
    data->fssubtype = fusefs_args.fssubtype;
    data->mountaltflags = fusefs_args.altflags;
    data->noimplflags = (uint64_t)0;

    data->blocksize = fuse_round_size(fusefs_args.blocksize,
                                      FUSE_MIN_BLOCKSIZE, FUSE_MAX_BLOCKSIZE);

    data->iosize = fuse_round_size(fusefs_args.iosize,
                                   FUSE_MIN_IOSIZE, FUSE_MAX_IOSIZE);

    if (data->iosize < data->blocksize) {
        data->iosize = data->blocksize;
    }

    data->userkernel_bufsize = FUSE_DEFAULT_USERKERNEL_BUFSIZE;

    copystr(fusefs_args.fsname, vfsstatfsp->f_mntfromname,
            MNAMELEN - 1, &len);
    bzero(vfsstatfsp->f_mntfromname + len, MNAMELEN - len);

    copystr(fusefs_args.volname, data->volname, MAXPATHLEN - 1, &len);
    bzero(data->volname + len, MAXPATHLEN - len);

    /* previous location of vfs_setioattr() */

    vfs_setfsprivate(mp, data);

    fuse_device_unlock(fdev);

    /* Send a handshake message to the daemon. */
    kr = kernel_thread_start(fuse_internal_init, data, &init_thread);
    if (kr != KERN_SUCCESS) {
        IOLog("OSXFUSE: could not start init thread\n");
        err = ENOTCONN;
    } else {
        thread_deallocate(init_thread);
    }

out:
    if (err) {
        vfs_setfsprivate(mp, NULL);

        fuse_device_lock(fdev);
        data = fuse_device_get_mpdata(fdev); /* again */
        if (mounted) {
            OSAddAtomic(-1, (SInt32 *)&fuse_mount_count);
        }
        if (data) {
            data->mount_state = FM_NOTMOUNTED;
            if (!(data->dataflags & FSESS_OPENED)) {
#if M_OSXFUSE_ENABLE_BIG_LOCK
                assert(biglock == data->biglock);
                fuse_biglock_unlock(biglock);
#endif
                fuse_device_close_final(fdev);
                /* data is gone now */
            }
        }
        fuse_device_unlock(fdev);
    } else {
        vnode_t fuse_rootvp = NULLVP;
        err = fuse_vfsop_root(mp, &fuse_rootvp, context);
        if (err) {
            goto out; /* go back and follow error path */
        }
        err = vnode_ref(fuse_rootvp);
#if M_OSXFUSE_ENABLE_BIG_LOCK
        /*
         * Even though fuse_rootvp will not be reclaimed when calling vnode_put
         * because we incremented its usecount by calling vnode_ref release
         * biglock just to be safe.
         */
        fuse_biglock_unlock(biglock);
#endif /* M_OSXFUSE_ENABLE_BIG_LOCK */
        (void)vnode_put(fuse_rootvp);
#if M_OSXFUSE_ENABLE_BIG_LOCK
        fuse_biglock_lock(biglock);
#endif
        if (err) {
            goto out; /* go back and follow error path */
        } else {
            struct vfsioattr ioattr;

            vfs_ioattr(mp, &ioattr);
            ioattr.io_maxreadcnt = ioattr.io_maxwritecnt = data->iosize;
            ioattr.io_segreadcnt = ioattr.io_segwritecnt = data->iosize / PAGE_SIZE;
            ioattr.io_maxsegreadsize = ioattr.io_maxsegwritesize = data->iosize;
            ioattr.io_devblocksize = data->blocksize;
            vfs_setioattr(mp, &ioattr);
        }
    }

#if M_OSXFUSE_ENABLE_BIG_LOCK
    fuse_device_lock(fdev);
    data = fuse_device_get_mpdata(fdev); /* ...and again */
    if(data) {
        assert(data->biglock == biglock);
        fuse_biglock_unlock(biglock);
    }
    fuse_device_unlock(fdev);
#endif

    return err;
}
Ejemplo n.º 10
0
/*
 * Look up a vnode/nfsnode by file handle.
 * Callers must check for mount points!!
 * In all cases, a pointer to a
 * nfsnode structure is returned.
 */
int
nfs_nget(
	mount_t mp,
	nfsnode_t dnp,
	struct componentname *cnp,
	u_char *fhp,
	int fhsize,
	struct nfs_vattr *nvap,
	u_int64_t *xidp,
	uint32_t auth,
	int flags,
	nfsnode_t *npp)
{
	nfsnode_t np;
	struct nfsnodehashhead *nhpp;
	vnode_t vp;
	int error, nfsvers;
	mount_t mp2;
	struct vnode_fsparam vfsp;
	uint32_t vid;

	FSDBG_TOP(263, mp, dnp, flags, npp);

	/* Check for unmount in progress */
	if (!mp || vfs_isforce(mp)) {
		*npp = NULL;
		error = ENXIO;
		FSDBG_BOT(263, mp, dnp, 0xd1e, error);
		return (error);
	}
	nfsvers = VFSTONFS(mp)->nm_vers;

	nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
loop:
	lck_mtx_lock(nfs_node_hash_mutex);
	for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) {
		mp2 = (np->n_hflag & NHINIT) ? np->n_mount : NFSTOMP(np);
		if (mp != mp2 || np->n_fhsize != fhsize ||
		    bcmp(fhp, np->n_fhp, fhsize))
			continue;
		if (nvap && (nvap->nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) &&
		    cnp && (cnp->cn_namelen > (fhsize - (int)sizeof(dnp)))) {
			/* The name was too long to fit in the file handle.  Check it against the node's name. */
			int namecmp = 0;
			const char *vname = vnode_getname(NFSTOV(np));
			if (vname) {
				if (cnp->cn_namelen != (int)strlen(vname))
					namecmp = 1;
				else
					namecmp = strncmp(vname, cnp->cn_nameptr, cnp->cn_namelen);
				vnode_putname(vname);
			}
			if (namecmp)  /* full name didn't match */
				continue;
		}
		FSDBG(263, dnp, np, np->n_flag, 0xcace0000);
		/* if the node is locked, sleep on it */
		if ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE)) {
			np->n_hflag |= NHLOCKWANT;
			FSDBG(263, dnp, np, np->n_flag, 0xcace2222);
			msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL);
			FSDBG(263, dnp, np, np->n_flag, 0xcace3333);
			goto loop;
		}
		vp = NFSTOV(np);
		vid = vnode_vid(vp);
		lck_mtx_unlock(nfs_node_hash_mutex);
		if ((error = vnode_getwithvid(vp, vid))) {
			/*
			 * If vnode is being reclaimed or has already
			 * changed identity, no need to wait.
			 */
			FSDBG_BOT(263, dnp, *npp, 0xcace0d1e, error);
			return (error);
		}
		if ((error = nfs_node_lock(np))) {
			/* this only fails if the node is now unhashed */
			/* so let's see if we can find/create it again */
			FSDBG(263, dnp, *npp, 0xcaced1e2, error);
			vnode_put(vp);
			if (flags & NG_NOCREATE) {
				*npp = 0;
				FSDBG_BOT(263, dnp, *npp, 0xcaced1e0, ENOENT);
				return (ENOENT);
			}
			goto loop;
		}
		/* update attributes */
		if (nvap)
			error = nfs_loadattrcache(np, nvap, xidp, 0);
		if (error) {
			nfs_node_unlock(np);
			vnode_put(vp);
		} else {
			if (dnp && cnp && (flags & NG_MAKEENTRY))
				cache_enter(NFSTOV(dnp), vp, cnp);
			/*
			 * Update the vnode if the name/and or the parent has
			 * changed. We need to do this so that if getattrlist is
			 * called asking for ATTR_CMN_NAME, that the "most"
			 * correct name is being returned. In addition for
			 * monitored vnodes we need to kick the vnode out of the
			 * name cache. We do this so that if there are hard
			 * links in the same directory the link will not be
			 * found and a lookup will get us here to return the
			 * name of the current link. In addition by removing the
			 * name from the name cache the old name will not be
			 * found after a rename done on another client or the
			 * server.  The principle reason to do this is because
			 * Finder is asking for notifications on a directory.
			 * The directory changes, Finder gets notified, reads
			 * the directory (which we have purged) and for each
			 * entry returned calls getattrlist with the name
			 * returned from readdir. gettattrlist has to call
			 * namei/lookup to resolve the name, because its not in
			 * the cache we end up here. We need to update the name
			 * so Finder will get the name it called us with.
			 *
			 * We had an imperfect solution with respect to case
			 * sensitivity.  There is a test that is run in
			 * FileBuster that does renames from some name to
			 * another name differing only in case. It then reads
			 * the directory looking for the new name, after it
			 * finds that new name, it ask gettattrlist to verify
			 * that the name is the new name.  Usually that works,
			 * but renames generate fsevents and fseventsd will do a
			 * lookup on the name via lstat. Since that test renames
			 * old name to new name back and forth there is a race
			 * that an fsevent will be behind and will access the
			 * file by the old name, on a case insensitive file
			 * system that will work. Problem is if we do a case
			 * sensitive compare, we're going to change the name,
			 * which the test's getattrlist verification step is
			 * going to fail. So we will check the case sensitivity
			 * of the file system and do the appropriate compare. In
			 * a rare instance for non homogeneous file systems
			 * w.r.t. pathconf we will use case sensitive compares.
			 * That could break if the file system is actually case
			 * insensitive.
			 *
			 * Note that V2 does not know the case, so we just
			 * assume case sensitivity. 
			 *
			 * This is clearly not perfect due to races, but this is
			 * as good as its going to get. You can defeat the
			 * handling of hard links simply by doing:
			 *
			 *	while :; do ls -l > /dev/null; done
			 *
			 * in a terminal window. Even a single ls -l can cause a
			 * race.
			 *
			 * <rant>What we really need is for the caller, that
			 * knows the name being used is valid since it got it
			 * from a readdir to use that name and not ask for the
			 * ATTR_CMN_NAME</rant>
			 */
			if (dnp && cnp && (vp != NFSTOV(dnp))) {
				int update_flags = (vnode_ismonitored((NFSTOV(dnp)))) ? VNODE_UPDATE_CACHE : 0;
				int (*cmp)(const char *s1, const char *s2, size_t n);

				cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp;

				if (vp->v_name && cnp->cn_namelen && (*cmp)(cnp->cn_nameptr, vp->v_name, cnp->cn_namelen))
					update_flags |= VNODE_UPDATE_NAME;
				if ((vp->v_name == NULL && cnp->cn_namelen != 0) || (vp->v_name != NULL && cnp->cn_namelen == 0))
					update_flags |= VNODE_UPDATE_NAME;
				if (vnode_parent(vp) != NFSTOV(dnp))
					update_flags |= VNODE_UPDATE_PARENT;
				if (update_flags) {
					NFS_NODE_DBG("vnode_update_identity old name %s new name %.*s update flags = %x\n",
						     vp->v_name, cnp->cn_namelen, cnp->cn_nameptr ? cnp->cn_nameptr : "", update_flags);
					vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cnp->cn_namelen, 0, update_flags);
				}
			}

			*npp = np;
		}
		FSDBG_BOT(263, dnp, *npp, 0xcace0000, error);
		return(error);
	}

	FSDBG(263, mp, dnp, npp, 0xaaaaaaaa);

	if (flags & NG_NOCREATE) {
		lck_mtx_unlock(nfs_node_hash_mutex);
		*npp = 0;
		FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT);
		return (ENOENT);
	}

	/*
	 * allocate and initialize nfsnode and stick it in the hash
	 * before calling getnewvnode().  Anyone finding it in the
	 * hash before initialization is complete will wait for it.
	 */
	MALLOC_ZONE(np, nfsnode_t, sizeof *np, M_NFSNODE, M_WAITOK);
	if (!np) {
		lck_mtx_unlock(nfs_node_hash_mutex);
		*npp = 0;
		FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOMEM);
		return (ENOMEM);
	}
	bzero(np, sizeof *np);
	np->n_hflag |= (NHINIT | NHLOCKED);
	np->n_mount = mp;
	np->n_auth = auth;
	TAILQ_INIT(&np->n_opens);
	TAILQ_INIT(&np->n_lock_owners);
	TAILQ_INIT(&np->n_locks);
	np->n_dlink.tqe_next = NFSNOLIST;
	np->n_dreturn.tqe_next = NFSNOLIST;
	np->n_monlink.le_next = NFSNOLIST;

	/* ugh... need to keep track of ".zfs" directories to workaround server bugs */
	if ((nvap->nva_type == VDIR) && cnp && (cnp->cn_namelen == 4) &&
	    (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == 'z') &&
	    (cnp->cn_nameptr[2] == 'f') && (cnp->cn_nameptr[3] == 's'))
		np->n_flag |= NISDOTZFS;
	if (dnp && (dnp->n_flag & NISDOTZFS))
		np->n_flag |= NISDOTZFSCHILD;

	if (dnp && cnp && ((cnp->cn_namelen != 2) ||
	    (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) {
		vnode_t dvp = NFSTOV(dnp);
		if (!vnode_get(dvp)) {
			if (!vnode_ref(dvp))
				np->n_parent = dvp;
			vnode_put(dvp);
		}
	}

	/* setup node's file handle */
	if (fhsize > NFS_SMALLFH) {
		MALLOC_ZONE(np->n_fhp, u_char *,
				fhsize, M_NFSBIGFH, M_WAITOK);
		if (!np->n_fhp) {
			lck_mtx_unlock(nfs_node_hash_mutex);
			FREE_ZONE(np, sizeof *np, M_NFSNODE);
			*npp = 0;
			FSDBG_BOT(263, dnp, *npp, 0x80000002, ENOMEM);
			return (ENOMEM);
		}
	} else {
Ejemplo n.º 11
0
static int
vfs_mount_9p(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx)
{
#pragma unused(devvp)
	struct sockaddr *addr, *authaddr;
	struct vfsstatfs *sp;
	char authkey[DESKEYLEN+1];
	kauth_cred_t cred;
	user_args_9p args;
	mount_9p *nmp;
	size_t size;
	fid_9p fid;
	qid_9p qid;
	char *vers;
	int e;

	TRACE();
	nmp = NULL;
	addr = NULL;
	authaddr = NULL;
	fid = NOFID;

	if (vfs_isupdate(mp))
		return ENOTSUP;

	if (vfs_context_is64bit(ctx)) {
		if ((e=copyin(data, &args, sizeof(args))))
			goto error;
	} else {
		args_9p args32;
		if ((e=copyin(data, &args32, sizeof(args32))))
			goto error;
		args.spec			= CAST_USER_ADDR_T(args32.spec);
		args.addr			= CAST_USER_ADDR_T(args32.addr);
		args.addrlen		= args32.addrlen;
		args.authaddr		= CAST_USER_ADDR_T(args32.authaddr);
		args.authaddrlen	= args32.authaddrlen;
		args.volume			= CAST_USER_ADDR_T(args32.volume);
		args.uname			= CAST_USER_ADDR_T(args32.uname);
		args.aname			= CAST_USER_ADDR_T(args32.aname);
		args.authkey		= CAST_USER_ADDR_T(args32.authkey);
		args.flags			= args32.flags;
	}
	e = ENOMEM;
	nmp = malloc_9p(sizeof(*nmp));
	if (nmp == NULL)
		return e;

	nmp->mp = mp;
	TAILQ_INIT(&nmp->req);
	nmp->lck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->reqlck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->nodelck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL);
	nmp->node = hashinit(desiredvnodes, M_TEMP, &nmp->nodelen);
	if (nmp->lck==NULL || nmp->reqlck==NULL || nmp->nodelck==NULL || nmp->node==NULL)
		goto error;

	if ((e=nameget_9p(args.volume, &nmp->volume)))
		goto error;
	if ((e=nameget_9p(args.uname, &nmp->uname)))
		goto error;
	if ((e=nameget_9p(args.aname, &nmp->aname)))
		goto error;

	cred = vfs_context_ucred(ctx);
	if (IS_VALID_CRED(cred)) {
		nmp->uid = kauth_cred_getuid(cred);
		nmp->gid = kauth_cred_getgid(cred);
	} else {
		nmp->uid = KAUTH_UID_NONE;
		nmp->gid = KAUTH_GID_NONE;
	}
	
	vfs_getnewfsid(mp);
	vfs_setfsprivate(mp, nmp);
	
	nmp->flags = args.flags;
	if ((e=addrget_9p(args.addr, args.addrlen, &addr)))
		goto error;
	if ((e=connect_9p(nmp, addr)))
		goto error;

	vers = VERSION9P;
	if (ISSET(nmp->flags, FLAG_DOTU))
		vers = VERSION9PDOTU;
	if ((e=version_9p(nmp, vers, &nmp->version)))
		goto error;
	if (ISSET(nmp->flags, FLAG_DOTU) && strcmp(VERSION9PDOTU, nmp->version)==0)
		SET(nmp->flags, F_DOTU);

	nmp->afid = NOFID;
	if (args.authaddr && args.authaddrlen && args.authkey) {
		if ((e=copyin(args.authkey, authkey, DESKEYLEN)))
			goto error;
		if ((e=addrget_9p(args.authaddr, args.authaddrlen, &authaddr)))
			goto error;
		if ((e=auth_9p(nmp, nmp->uname, nmp->aname, nmp->uid, &nmp->afid, &qid)))
			goto error;
		if (nmp->afid!=NOFID &&
			(e=authp9any_9p(nmp, nmp->afid, authaddr, nmp->uname, authkey)))
			goto error;
		bzero(authkey, DESKEYLEN);
	}
	if ((e=attach_9p(nmp, nmp->uname, nmp->aname, nmp->afid, nmp->uid, &fid, &qid)))
		goto error;

	if ((e=nget_9p(nmp, fid, qid, NULL, &nmp->root, NULL, ctx)))
		goto error;

	nunlock_9p(NTO9P(nmp->root));
	e = vnode_ref(nmp->root);
	vnode_put(nmp->root);
	if (e)
		goto error;

	vfs_setauthopaque(mp);
	vfs_clearauthopaqueaccess(mp);
	vfs_setlocklocal(mp);

	// init stats
	sp = vfs_statfs(nmp->mp);
	copyinstr(args.spec, sp->f_mntfromname, MNAMELEN-1, &size);
	bzero(sp->f_mntfromname+size, MNAMELEN-size);
	sp->f_bsize = PAGE_SIZE;
	sp->f_iosize = nmp->msize-IOHDRSZ;
	sp->f_blocks = sp->f_bfree = sp->f_bavail = sp->f_bused = 0;
	sp->f_files = 65535;
	sp->f_ffree = sp->f_files-2;
	sp->f_flags = vfs_flags(mp);
	
	free_9p(addr);
	free_9p(authaddr);
	return 0;

error:
	bzero(authkey, DESKEYLEN);
	free_9p(addr);
	free_9p(authaddr);
	if (nmp->so) {
		clunk_9p(nmp, fid);
		disconnect_9p(nmp);
	}
	freemount_9p(nmp);
	vfs_setfsprivate(mp, NULL);
	return e;
}
Ejemplo n.º 12
0
/* works like PFlushVolumeData */
void
darwin_notify_perms(struct unixuser *auser, int event)
{
    int i;
    struct afs_q *tq, *uq = NULL;
    struct vcache *tvc, *hnext;
    int isglock = ISAFS_GLOCK();
    struct vnode *vp;
    struct vnode_attr va;
    int isctxtowner = 0;

    if (!afs_darwin_fsevents)
	return;

    VATTR_INIT(&va);
    VATTR_SET(&va, va_mode, 0777);
    if (event & UTokensObtained)
	VATTR_SET(&va, va_uid, auser->uid);
    else
	VATTR_SET(&va, va_uid, -2); /* nobody */

    if (!isglock)
	AFS_GLOCK();
    if (!(vfs_context_owner == current_thread())) {
	get_vfs_context();
	isctxtowner = 1;
    }
loop:
    ObtainReadLock(&afs_xvcache);
    for (i = 0; i < VCSIZE; i++) {
	for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) {
	    uq = QPrev(tq);
	    tvc = QTOVH(tq);
	    if (tvc->f.states & CDeadVnode) {
		/* we can afford to be best-effort */
		continue;
	    }
	    /* no per-file acls, so only notify on directories */
	    if (!(vp = AFSTOV(tvc)) || !vnode_isdir(AFSTOV(tvc)))
		continue;
	    /* dynroot object. no callbacks. anonymous ACL. just no. */
	    if (afs_IsDynrootFid(&tvc->f.fid))
		continue;
	    /* no fake fsevents on mount point sources. leaks refs */
	    if (tvc->mvstat == 1)
		continue;
	    /* if it's being reclaimed, just pass */
	    if (vnode_get(vp))
		continue;
	    if (vnode_ref(vp)) {
		AFS_GUNLOCK();
		vnode_put(vp);
		AFS_GLOCK();
		continue;
	    }
	    ReleaseReadLock(&afs_xvcache);
	    /* Avoid potentially re-entering on this lock */
	    if (0 == NBObtainWriteLock(&tvc->lock, 234)) {
		tvc->f.states |= CEvent;
		AFS_GUNLOCK();
		vnode_setattr(vp, &va, afs_osi_ctxtp);
		tvc->f.states &= ~CEvent;
		vnode_put(vp);
		AFS_GLOCK();
		ReleaseWriteLock(&tvc->lock);
	    }
	    ObtainReadLock(&afs_xvcache);
	    uq = QPrev(tq);
	    /* our tvc ptr is still good until now */
	    AFS_FAST_RELE(tvc);
	}
    }
    ReleaseReadLock(&afs_xvcache);
    if (isctxtowner)
	put_vfs_context();
    if (!isglock)
	AFS_GUNLOCK();
}
Ejemplo n.º 13
0
struct vcache *
osi_dnlc_lookup(struct vcache *adp, char *aname, int locktype)
{
    struct vcache *tvc;
    int LRUme;
    unsigned int key, skey;
    char *ts = aname;
    struct nc *tnc, *tnc1 = 0;
    int safety;
#ifdef AFS_DARWIN80_ENV
    vnode_t tvp;
#endif

    ma_critical_enter();

    if (!afs_usednlc) {
      ma_critical_exit();
      return 0;
    }

    dnlcHash(ts, key);		/* leaves ts pointing at the NULL */
    if (ts - aname >= AFSNCNAMESIZE) {
      ma_critical_exit();
      return 0;
    }
    skey = key & (NHSIZE - 1);

    TRACE(osi_dnlc_lookupT, skey);
    dnlcstats.lookups++;

    ObtainReadLock(&afs_xvcache);
    ObtainReadLock(&afs_xdnlc);

    for (tvc = NULL, tnc = nameHash[skey], safety = 0; tnc;
	 tnc = tnc->next, safety++) {
	if ( /* (tnc->key == key)  && */ (tnc->dirp == adp)
	    && (!strcmp((char *)tnc->name, aname))) {
	    tvc = tnc->vp;
	    tnc1 = tnc;
	    break;
	} else if (tnc->next == nameHash[skey]) {	/* end of list */
	    break;
	} else if (safety > NCSIZE) {
	    afs_warn("DNLC cycle");
	    dnlcstats.cycles++;
	    ReleaseReadLock(&afs_xdnlc);
	    ReleaseReadLock(&afs_xvcache);
	    osi_dnlc_purge();
	    ma_critical_exit();
	    return (0);
	}
    }

    LRUme = 0;			/* (tnc != nameHash[skey]); */
    ReleaseReadLock(&afs_xdnlc);

    if (!tvc) {
	ReleaseReadLock(&afs_xvcache);
	dnlcstats.misses++;
    } else {
	if ((tvc->f.states & CVInit)
#ifdef  AFS_DARWIN80_ENV
	    ||(tvc->f.states & CDeadVnode)
#endif
	    )      
	{
	    ReleaseReadLock(&afs_xvcache);
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    ma_critical_exit();
	    return 0;
	}
#if defined(AFS_DARWIN80_ENV)
	tvp = AFSTOV(tvc);
	if (vnode_get(tvp)) {
	    ReleaseReadLock(&afs_xvcache);
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    ma_critical_exit();
	    return 0;
	}
	if (vnode_ref(tvp)) {
	    ReleaseReadLock(&afs_xvcache);
	    AFS_GUNLOCK();
	    vnode_put(tvp);
	    AFS_GLOCK();
	    dnlcstats.misses++;
	    osi_dnlc_remove(adp, aname, tvc);
	    ma_critical_exit();
	    return 0;
	}
#elif defined(AFS_FBSD_ENV)
	/* can't sleep in a critical section */
	ma_critical_exit();
	osi_vnhold(tvc, 0);
	ma_critical_enter();
#else
	osi_vnhold(tvc, 0);
#endif
	ReleaseReadLock(&afs_xvcache);

#ifdef	notdef
	/* 
	 * XX If LRUme ever is non-zero change the if statement around because
	 * aix's cc with optimizer on won't necessarily check things in order XX
	 */
	if (LRUme && (0 == NBObtainWriteLock(&afs_xdnlc))) {
	    /* don't block to do this */
	    /* tnc might have been moved during race condition, */
	    /* but it's always in a legit hash chain when a lock is granted, 
	     * or else it's on the freelist so prev == NULL, 
	     * so at worst this is redundant */
	    /* Now that we've got it held, and a lock on the dnlc, we 
	     * should check to be sure that there was no race, and 
	     * bail out if there was. */
	    if (tnc->prev) {
		/* special case for only two elements on list - relative ordering
		 * doesn't change */
		if (tnc->prev != tnc->next) {
		    /* remove from old location */
		    tnc->prev->next = tnc->next;
		    tnc->next->prev = tnc->prev;
		    /* insert into new location */
		    tnc->next = nameHash[skey];
		    tnc->prev = tnc->next->prev;
		    tnc->next->prev = tnc;
		    tnc->prev->next = tnc;
		}
		nameHash[skey] = tnc;
	    }
	    ReleaseWriteLock(&afs_xdnlc);
	}
#endif
    }

    ma_critical_exit();
    return tvc;
}
Ejemplo n.º 14
0
/*
 * This function is called very early on in the Mach startup, from the
 * function start_kernel_threads() in osfmk/kern/startup.c.  It's called
 * in the context of the current (startup) task using a call to the
 * function kernel_thread_create() to jump into start_kernel_threads().
 * Internally, kernel_thread_create() calls thread_create_internal(),
 * which calls uthread_alloc().  The function of uthread_alloc() is
 * normally to allocate a uthread structure, and fill out the uu_sigmask,
 * uu_context fields.  It skips filling these out in the case of the "task"
 * being "kernel_task", because the order of operation is inverted.  To
 * account for that, we need to manually fill in at least the contents
 * of the uu_context.vc_ucred field so that the uthread structure can be
 * used like any other.
 */
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
#if __i386__ || __x86_64__
	int error;
#endif	
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	kernel_flock = funnel_alloc(KERNEL_FUNNEL);
	if (kernel_flock == (funnel_t *)0 ) {
		panic("bsd_init: Failed to allocate kernel funnel");
	}
        
	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#ifndef CONFIG_EMBEDDED
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#ifdef CONFIG_EMBEDDED
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#else	
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#endif

	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;
#endif /* MAC */

	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);	
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

#if CONFIG_LCTX
	kernproc->p_lctx = NULL;
#endif

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	temp_cred.cr_ngroups = 1;

	temp_cred.cr_audit.as_aia_p = &audit_default_aia;
        /* XXX the following will go away with cr_au */
	temp_cred.cr_au.ai_auid = AU_DEFAUDITID;

	bsd_init_kprintf("calling kauth_cred_create\n");
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
	mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task);
#endif

	/* Create the file descriptor table. */
	filedesc0.fd_refcnt = 1+1;	/* +1 so shutdown will not _FREE_ZONE */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for two  processes: init and mach_init.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE,
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	if (turn_on_log_leaks && !new_nkdbufs)
		new_nkdbufs = 200000;
	start_kern_tracing(new_nkdbufs);
	if (turn_on_log_leaks)
		log_leaks = 1;

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();

	/* Stack snapshot facility lock */
	stackshot_lock_init();
	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_EMBEDDED
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling kern_memorystatus_init\n");
	kern_memorystatus_init();
#endif

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	/* kick off timeout driven events by calling first time */
	thread_wakeup(&lbolt);
	timeout(lightning_bolt, 0, hz);

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
	/* register user tunnel kernel control handler */
	utun_register_control();
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();
#if 0
	/* XXX Hack for early debug stop */
	printf("\nabout to sleep for 10 seconds\n");
	IOSleep( 10 * 1000 );
	/* Debugger("hello"); */
#endif

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

#if CONFIG_EMBEDDED
	{
		/* print out early VM statistics */
		kern_return_t kr1;
		vm_statistics_data_t stat;
		mach_msg_type_number_t count;

		count = HOST_VM_INFO_COUNT;
		kr1 = host_statistics(host_self(),
				      HOST_VM_INFO,
				      (host_info_t)&stat,
				      &count);
		kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n"
			"Pages free:\t\t\t%u.\n"
			"Pages active:\t\t\t%u.\n"
			"Pages inactive:\t\t\t%u.\n"
			"Pages wired down:\t\t%u.\n"
			"\"Translation faults\":\t\t%u.\n"
			"Pages copy-on-write:\t\t%u.\n"
			"Pages zero filled:\t\t%u.\n"
			"Pages reactivated:\t\t%u.\n"
			"Pageins:\t\t\t%u.\n"
			"Pageouts:\t\t\t%u.\n"
			"Object cache: %u hits of %u lookups (%d%% hit rate)\n",

			stat.free_count,
			stat.active_count,
			stat.inactive_count,
			stat.wire_count,
			stat.faults,
			stat.cow_faults,
			stat.zero_fill_count,
			stat.reactivations,
			stat.pageins,
			stat.pageouts,
			stat.hits,
			stat.lookups,
			(stat.hits == 0) ? 100 :
			                   ((stat.lookups * 100) / stat.hits));
	}
#endif /* CONFIG_EMBEDDED */
	
	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (mountroot == netboot_mountroot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (mountroot == netboot_mountroot) {
		int err;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
  
	if(imageboot_needed()) {
		int err;

		/* An image was found */
		if((err = imageboot_setup())) {
			/*
			 * this is not fatal. Keep trying to root
			 * off the original media
			 */
			printf("%s: imageboot could not find root, %d\n",
				__FUNCTION__, err);
		}
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime(&kernproc->p_start);
	kernproc->p_stats->p_start = kernproc->p_start;	/* for compat */

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */
	
	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
	printf("Kernel is LP64\n");
#endif
#if __i386__ || __x86_64__
	/* this should be done after the root filesystem is mounted */
	error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	// 10/30/08 - gab: <rdar://problem/6324501>
	// if default 'translate' can't be found, see if the understudy is available
	if (ENOENT == error) {
		strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN);
		error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	}
	if (error) /* XXX make more generic */
		exec_archhandler_ppc.path[0] = 0;
#endif	

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif

	bsd_init_kprintf("done\n");
}
Ejemplo n.º 15
0
/*
 * forkproc
 *
 * Description:	Create a new process structure, given a parent process
 *		structure.
 *
 * Parameters:	parent_proc		The parent process
 *
 * Returns:	!NULL			The new process structure
 *		NULL			Error (insufficient free memory)
 *
 * Note:	When successful, the newly created process structure is
 *		partially initialized; if a caller needs to deconstruct the
 *		returned structure, they must call forkproc_free() to do so.
 */
proc_t
forkproc(proc_t parent_proc)
{
	proc_t child_proc;	/* Our new process */
	static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
	int error = 0;
	struct session *sessp;
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());

	MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
	if (child_proc == NULL) {
		printf("forkproc: M_PROC zone exhausted\n");
		goto bad;
	}
	/* zero it out as we need to insert in hash */
	bzero(child_proc, sizeof *child_proc);

	MALLOC_ZONE(child_proc->p_stats, struct pstats *,
			sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
	if (child_proc->p_stats == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}
	MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
			sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
	if (child_proc->p_sigacts == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}

	/* allocate a callout for use by interval timers */
	child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
	if (child_proc->p_rcall == NULL) {
		FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}


	/*
	 * Find an unused PID.  
	 */

	proc_list_lock();

	nextpid++;
retry:
	/*
	 * If the process ID prototype has wrapped around,
	 * restart somewhat above 0, as the low-numbered procs
	 * tend to include daemons that don't exit.
	 */
	if (nextpid >= PID_MAX) {
		nextpid = 100;
		pidwrap = 1;
	}
	if (pidwrap != 0) {

		/* if the pid stays in hash both for zombie and runniing state */
		if  (pfind_locked(nextpid) != PROC_NULL) {
			nextpid++;
			goto retry;
		}

		if (pgfind_internal(nextpid) != PGRP_NULL) {
			nextpid++;
			goto retry;
		}	
		if (session_find_internal(nextpid) != SESSION_NULL) {
			nextpid++;
			goto retry;
		}	
	}
	nprocs++;
	child_proc->p_pid = nextpid;
	child_proc->p_idversion = nextpidversion++;
#if 1
	if (child_proc->p_pid != 0) {
		if (pfind_locked(child_proc->p_pid) != PROC_NULL)
			panic("proc in the list already\n");
	}
#endif
	/* Insert in the hash */
	child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
	LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
	proc_list_unlock();


	/*
	 * We've identified the PID we are going to use; initialize the new
	 * process structure.
	 */
	child_proc->p_stat = SIDL;
	child_proc->p_pgrpid = PGRPID_DEAD;

	/*
	 * The zero'ing of the proc was at the allocation time due to need
	 * for insertion to hash.  Copy the section that is to be copied
	 * directly from the parent.
	 */
	bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
	    (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));

	/*
	 * Some flags are inherited from the parent.
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY));
	if (parent_proc->p_flag & P_PROFIL)
		startprofclock(child_proc);
	/*
	 * Note that if the current thread has an assumed identity, this
	 * credential will be granted to the new process.
	 */
	child_proc->p_ucred = kauth_cred_get_with_ref();

#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
#else /* !CONFIG_EMBEDDED */
	lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
#endif /* !CONFIG_EMBEDDED */
	klist_init(&child_proc->p_klist);

	if (child_proc->p_textvp != NULLVP) {
		/* bump references to the text vnode */
		/* Need to hold iocount across the ref call */
		if (vnode_getwithref(child_proc->p_textvp) == 0) {
			error = vnode_ref(child_proc->p_textvp);
			vnode_put(child_proc->p_textvp);
			if (error != 0)
				child_proc->p_textvp = NULLVP;
		}
	}

	/*
	 * Copy the parents per process open file table to the child; if
	 * there is a per-thread current working directory, set the childs
	 * per-process current working directory to that instead of the
	 * parents.
	 *
	 * XXX may fail to copy descriptors to child
	 */
	child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);

#if SYSV_SHM
	if (parent_proc->vm_shm) {
		/* XXX may fail to attach shm to child */
		(void)shmfork(parent_proc, child_proc);
	}
#endif
	/*
	 * inherit the limit structure to child
	 */
	proc_limitfork(parent_proc, child_proc);

	if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
		uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
		child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
	}

	/* Intialize new process stats, including start time */
	/* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
	bzero(&child_proc->p_stats->pstat_startzero,
	    (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
	    (caddr_t)&child_proc->p_stats->pstat_startzero));
	bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
	microtime(&child_proc->p_start);
	child_proc->p_stats->p_start = child_proc->p_start;     /* for compat */

	if (parent_proc->p_sigacts != NULL)
		(void)memcpy(child_proc->p_sigacts,
				parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
	else
		(void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);

	sessp = proc_session(parent_proc);
	if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
		OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
	session_rele(sessp);

	/*
	 * block all signals to reach the process.
	 * no transition race should be occuring with the child yet,
	 * but indicate that the process is in (the creation) transition.
	 */
	proc_signalstart(child_proc, 0);
	proc_transstart(child_proc, 0);

	child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
	TAILQ_INIT(&child_proc->p_uthlist);
	TAILQ_INIT(&child_proc->p_aio_activeq);
	TAILQ_INIT(&child_proc->p_aio_doneq);

	/* Inherit the parent flags for code sign */
	child_proc->p_csflags = parent_proc->p_csflags;

	/*
	 * All processes have work queue locks; cleaned up by
	 * reap_child_locked()
	 */
	workqueue_init_lock(child_proc);

	/*
	 * Copy work queue information
	 *
	 * Note: This should probably only happen in the case where we are
	 *	creating a child that is a copy of the parent; since this
	 *	routine is called in the non-duplication case of vfork()
	 *	or posix_spawn(), then this information should likely not
	 *	be duplicated.
	 *
	 * <rdar://6640553> Work queue pointers that no longer point to code
	 */
	child_proc->p_wqthread = parent_proc->p_wqthread;
	child_proc->p_threadstart = parent_proc->p_threadstart;
	child_proc->p_pthsize = parent_proc->p_pthsize;
	child_proc->p_targconc = parent_proc->p_targconc;
	if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
		child_proc->p_lflag |= P_LREGISTER;
	}
	child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
#if PSYNCH
	pth_proc_hashinit(child_proc);
#endif /* PSYNCH */

#if CONFIG_LCTX
	child_proc->p_lctx = NULL;
	/* Add new process to login context (if any). */
	if (parent_proc->p_lctx != NULL) {
		/*
		 * <rdar://6640564> This should probably be delayed in the
		 * vfork() or posix_spawn() cases.
		 */
		LCTX_LOCK(parent_proc->p_lctx);
		enterlctx(child_proc, parent_proc->p_lctx, 0);
	}
#endif

bad:
	return(child_proc);
}
Ejemplo n.º 16
0
/*
 * Mount null layer
 */
static int
nullfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
{
	int error                 = 0;
	struct vnode *lowerrootvp = NULL, *vp = NULL;
	struct vfsstatfs * sp   = NULL;
	struct null_mount * xmp = NULL;
	char data[MAXPATHLEN];
	size_t count;
	struct vfs_attr vfa;
	/* set defaults (arbitrary since this file system is readonly) */
	uint32_t bsize  = BLKDEV_IOSIZE;
	size_t iosize   = BLKDEV_IOSIZE;
	uint64_t blocks = 4711 * 4711;
	uint64_t bfree  = 0;
	uint64_t bavail = 0;
	uint64_t bused  = 4711;
	uint64_t files  = 4711;
	uint64_t ffree  = 0;

	kauth_cred_t cred = vfs_context_ucred(ctx);

	NULLFSDEBUG("nullfs_mount(mp = %p) %llx\n", (void *)mp, vfs_flags(mp));

	if (vfs_flags(mp) & MNT_ROOTFS)
		return (EOPNOTSUPP);

	/*
	 * Update is a no-op
	 */
	if (vfs_isupdate(mp)) {
		return ENOTSUP;
	}

	/* check entitlement */
	if (!IOTaskHasEntitlement(current_task(), NULLFS_ENTITLEMENT)) {
		return EPERM;
	}

	/*
	 * Get argument
	 */
	error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
	if (error) {
		NULLFSDEBUG("nullfs: error copying data form user %d\n", error);
		goto error;
	}

	/* This could happen if the system is configured for 32 bit inodes instead of
	 * 64 bit */
	if (count > MAX_MNT_FROM_LENGTH) {
		error = EINVAL;
		NULLFSDEBUG("nullfs: path to translocate too large for this system %d vs %d\n", count, MAX_MNT_FROM_LENGTH);
		goto error;
	}

	error = vnode_lookup(data, 0, &lowerrootvp, ctx);
	if (error) {
		NULLFSDEBUG("lookup %s -> %d\n", data, error);
		goto error;
	}

	/* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
	   Keep this to signal what we want to keep around the thing we are mirroring.
	   Drop it in unmount.*/
	error = vnode_ref(lowerrootvp);
	vnode_put(lowerrootvp);
	if (error)
	{
		// If vnode_ref failed, then null it out so it can't be used anymore in cleanup.
		lowerrootvp = NULL;
		goto error;
	}

	NULLFSDEBUG("mount %s\n", data);

	MALLOC(xmp, struct null_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO);
	if (xmp == NULL) {
		error = ENOMEM;
		goto error;
	}

	/*
	 * Save reference to underlying FS
	 */
	xmp->nullm_lowerrootvp  = lowerrootvp;
	xmp->nullm_lowerrootvid = vnode_vid(lowerrootvp);

	error = null_getnewvnode(mp, NULL, NULL, &vp, NULL, 1);
	if (error) {
		goto error;
	}

	/* vp has an iocount on it from vnode_create. drop that for a usecount. This
	 * is our root vnode so we drop the ref in unmount
	 *
	 * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
	vnode_ref(vp);
	vnode_put(vp);

	error = nullfs_init_lck(&xmp->nullm_lock);
	if (error) {
		goto error;
	}

	xmp->nullm_rootvp = vp;

	/* read the flags the user set, but then ignore some of them, we will only
	   allow them if they are set on the lower file system */
	uint64_t flags      = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
	uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);

	if (lowerflags) {
		flags |= lowerflags;
	}

	/* force these flags */
	flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
	vfs_setflags(mp, flags);

	vfs_setfsprivate(mp, xmp);
	vfs_getnewfsid(mp);
	vfs_setlocklocal(mp);

	/* fill in the stat block */
	sp = vfs_statfs(mp);
	strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);

	sp->f_flags = flags;

	xmp->nullm_flags = NULLM_CASEINSENSITIVE; /* default to case insensitive */

	error = nullfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
	if (error == 0) {
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
			bsize = vfa.f_bsize;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
			iosize = vfa.f_iosize;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
			blocks = vfa.f_blocks;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
			bfree = vfa.f_bfree;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
			bavail = vfa.f_bavail;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
			bused = vfa.f_bused;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
			files = vfa.f_files;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
			ffree = vfa.f_ffree;
		}
		if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
			if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
			    (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
				xmp->nullm_flags &= ~NULLM_CASEINSENSITIVE;
			}
		}
	} else {
		goto error;
	}

	sp->f_bsize  = bsize;
	sp->f_iosize = iosize;
	sp->f_blocks = blocks;
	sp->f_bfree  = bfree;
	sp->f_bavail = bavail;
	sp->f_bused  = bused;
	sp->f_files  = files;
	sp->f_ffree  = ffree;

	/* Associate the mac label information from the mirrored filesystem with the
	 * mirror */
	MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));

	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
	return (0);

error:
	if (xmp) {
		FREE(xmp, M_TEMP);
	}
	if (lowerrootvp) {
		vnode_getwithref(lowerrootvp);
		vnode_rele(lowerrootvp);
		vnode_put(lowerrootvp);
	}
	if (vp) {
		/* we made the root vnode but the mount is failed, so clean it up */
		vnode_getwithref(vp);
		vnode_rele(vp);
		/* give vp back */
		vnode_recycle(vp);
		vnode_put(vp);
	}
	return error;
}
Ejemplo n.º 17
0
static errno_t
fuse_vfsop_mount(mount_t mp, __unused vnode_t devvp, user_addr_t udata,
                 vfs_context_t context)
{
    int err      = 0;
    int mntopts  = 0;
    bool mounted = false;

    uint32_t max_read = ~0;

    size_t len;

    fuse_device_t      fdev = NULL;
    struct fuse_data  *data = NULL;
    fuse_mount_args    fusefs_args;
    struct vfsstatfs  *vfsstatfsp = vfs_statfs(mp);

#if M_FUSE4X_ENABLE_BIGLOCK
    lck_mtx_t         *biglock;
#endif

    fuse_trace_printf_vfsop();

    if (vfs_isupdate(mp)) {
        return ENOTSUP;
    }

    err = copyin(udata, &fusefs_args, sizeof(fusefs_args));
    if (err) {
        return EINVAL;
    }

    /*
     * Interesting flags that we can receive from mount or may want to
     * otherwise forcibly set include:
     *
     *     MNT_ASYNC
     *     MNT_AUTOMOUNTED
     *     MNT_DEFWRITE
     *     MNT_DONTBROWSE
     *     MNT_IGNORE_OWNERSHIP
     *     MNT_JOURNALED
     *     MNT_NODEV
     *     MNT_NOEXEC
     *     MNT_NOSUID
     *     MNT_NOUSERXATTR
     *     MNT_RDONLY
     *     MNT_SYNCHRONOUS
     *     MNT_UNION
     */

    err = ENOTSUP;

#if M_FUSE4X_ENABLE_UNSUPPORTED
    vfs_setlocklocal(mp);
#endif /* M_FUSE4X_ENABLE_UNSUPPORTED */

    /** Option Processing. **/

    if (*fusefs_args.fstypename) {
        size_t typenamelen = strlen(fusefs_args.fstypename);
        if (typenamelen > FUSE_FSTYPENAME_MAXLEN) {
            return EINVAL;
        }
        snprintf(vfsstatfsp->f_fstypename, MFSTYPENAMELEN, "%s%s",
                 FUSE_FSTYPENAME_PREFIX, fusefs_args.fstypename);
    }

    if (!*fusefs_args.fsname)
        return EINVAL;

    if ((fusefs_args.daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) ||
            (fusefs_args.daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT)) {
        return EINVAL;
    }

    if ((fusefs_args.init_timeout > FUSE_MAX_INIT_TIMEOUT) ||
            (fusefs_args.init_timeout < FUSE_MIN_INIT_TIMEOUT)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_SPARSE) {
        mntopts |= FSESS_SPARSE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_CACHE) {
        mntopts |= FSESS_AUTO_CACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_AUTO_XATTR) {
        if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
            return EINVAL;
        }
        mntopts |= FSESS_AUTO_XATTR;
    } else if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) {
        mntopts |= FSESS_NATIVE_XATTR;
    }

    if (fusefs_args.altflags & FUSE_MOPT_JAIL_SYMLINKS) {
        mntopts |= FSESS_JAIL_SYMLINKS;
    }

    /*
     * Note that unlike Linux, which keeps allow_root in user-space and
     * passes allow_other in that case to the kernel, we let allow_root
     * reach the kernel. The 'if' ordering is important here.
     */
    if (fusefs_args.altflags & FUSE_MOPT_ALLOW_ROOT) {
        int is_member = 0;
        if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) {
            log("fuse4x: caller is not a member of fuse4x admin group. "
                "Either add user (id=%d) to group (id=%d), "
                "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n",
                kauth_cred_getuid(kauth_cred_get()), fuse_admin_group);
            return EPERM;
        }
        mntopts |= FSESS_ALLOW_ROOT;
    } else if (fusefs_args.altflags & FUSE_MOPT_ALLOW_OTHER) {
        if (!fuse_allow_other && !fuse_vfs_context_issuser(context)) {
            int is_member = 0;
            if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) {
                log("fuse4x: caller is not a member of fuse4x admin group. "
                    "Either add user (id=%d) to group (id=%d), "
                    "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n",
                    kauth_cred_getuid(kauth_cred_get()), fuse_admin_group);
                return EPERM;
            }
        }
        mntopts |= FSESS_ALLOW_OTHER;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEDOUBLE) {
        mntopts |= FSESS_NO_APPLEDOUBLE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEXATTR) {
        mntopts |= FSESS_NO_APPLEXATTR;
    }

    if ((fusefs_args.altflags & FUSE_MOPT_FSID) && (fusefs_args.fsid != 0)) {
        fsid_t   fsid;
        mount_t  other_mp;
        uint32_t target_dev;

        target_dev = FUSE_MAKEDEV(FUSE_CUSTOM_FSID_DEVICE_MAJOR,
                                  fusefs_args.fsid);

        fsid.val[0] = target_dev;
        fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

        other_mp = vfs_getvfs(&fsid);
        if (other_mp != NULL) {
            return EPERM;
        }

        vfsstatfsp->f_fsid.val[0] = target_dev;
        vfsstatfsp->f_fsid.val[1] = FUSE_CUSTOM_FSID_VAL1;

    } else {
        vfs_getnewfsid(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_ATTRCACHE) {
        mntopts |= FSESS_NO_ATTRCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_READAHEAD) {
        mntopts |= FSESS_NO_READAHEAD;
    }

    if (fusefs_args.altflags & (FUSE_MOPT_NO_UBC | FUSE_MOPT_DIRECT_IO)) {
        mntopts |= FSESS_NO_UBC;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_VNCACHE) {
        mntopts |= FSESS_NO_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NEGATIVE_VNCACHE) {
        if (mntopts & FSESS_NO_VNCACHE) {
            return EINVAL;
        }
        mntopts |= FSESS_NEGATIVE_VNCACHE;
    }

    if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCWRITES) {

        /* Cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'. */
        if (mntopts & (FSESS_NO_READAHEAD | FSESS_NO_UBC)) {
            log("fuse4x: cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'\n");
            return EINVAL;
        }

        mntopts |= FSESS_NO_SYNCWRITES;
        vfs_clearflags(mp, MNT_SYNCHRONOUS);
        vfs_setflags(mp, MNT_ASYNC);

        /* We check for this only if we have nosyncwrites in the first place. */
        if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCONCLOSE) {
            mntopts |= FSESS_NO_SYNCONCLOSE;
        }

    } else {
        vfs_clearflags(mp, MNT_ASYNC);
        vfs_setflags(mp, MNT_SYNCHRONOUS);
    }

    if (mntopts & FSESS_NO_UBC) {
        /* If no buffer cache, disallow exec from file system. */
        vfs_setflags(mp, MNT_NOEXEC);
    }

    vfs_setauthopaque(mp);
    vfs_setauthopaqueaccess(mp);

    if ((fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) &&
            (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS)) {
        return EINVAL;
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) {
        mntopts |= FSESS_DEFAULT_PERMISSIONS;
        vfs_clearauthopaque(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS) {
        mntopts |= FSESS_DEFER_PERMISSIONS;
    }

    if (fusefs_args.altflags & FUSE_MOPT_EXTENDED_SECURITY) {
        mntopts |= FSESS_EXTENDED_SECURITY;
        vfs_setextendedsecurity(mp);
    }

    if (fusefs_args.altflags & FUSE_MOPT_LOCALVOL) {
        vfs_setflags(mp, MNT_LOCAL);
    }
    /* done checking incoming option bits */

    err = 0;

    vfs_setfsprivate(mp, NULL);

    fdev = fuse_device_get(fusefs_args.rdev);
    if (!fdev) {
        log("fuse4x: invalid device file (number=%d)\n", fusefs_args.rdev);
        return EINVAL;
    }

    fuse_lck_mtx_lock(fdev->mtx);

    data = fdev->data;

    if (!data) {
        fuse_lck_mtx_unlock(fdev->mtx);
        return ENXIO;
    }

#if M_FUSE4X_ENABLE_BIGLOCK
    biglock = data->biglock;
    fuse_biglock_lock(biglock);
#endif

    if (data->dataflags & FSESS_MOUNTED) {
#if M_FUSE4X_ENABLE_BIGLOCK
        fuse_biglock_unlock(biglock);
#endif
        fuse_lck_mtx_unlock(fdev->mtx);
        return EALREADY;
    }

    if (!(data->dataflags & FSESS_OPENED)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = ENXIO;
        goto out;
    }

    data->dataflags |= FSESS_MOUNTED;
    OSAddAtomic(1, (SInt32 *)&fuse_mount_count);
    mounted = true;

    if (fdata_dead_get(data)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = ENOTCONN;
        goto out;
    }

    if (!data->daemoncred) {
        panic("fuse4x: daemon found but identity unknown");
    }

    if (fuse_vfs_context_issuser(context) &&
            kauth_cred_getuid(vfs_context_ucred(context)) != kauth_cred_getuid(data->daemoncred)) {
        fuse_lck_mtx_unlock(fdev->mtx);
        err = EPERM;
        log("fuse4x: fuse daemon running by user_id=%d does not have privileges to mount on directory %s owned by user_id=%d\n",
            kauth_cred_getuid(data->daemoncred), vfsstatfsp->f_mntonname, kauth_cred_getuid(vfs_context_ucred(context)));
        goto out;
    }

    data->mp = mp;
    data->fdev = fdev;
    data->dataflags |= mntopts;

    data->daemon_timeout.tv_sec =  fusefs_args.daemon_timeout;
    data->daemon_timeout.tv_nsec = 0;
    if (data->daemon_timeout.tv_sec) {
        data->daemon_timeout_p = &(data->daemon_timeout);
    } else {
        data->daemon_timeout_p = NULL;
    }

    data->init_timeout.tv_sec = fusefs_args.init_timeout;
    data->init_timeout.tv_nsec = 0;

    data->max_read = max_read;
    data->fssubtype = fusefs_args.fssubtype;
    data->mountaltflags = fusefs_args.altflags;
    data->noimplflags = (uint64_t)0;

    data->blocksize = fuse_round_size(fusefs_args.blocksize,
                                      FUSE_MIN_BLOCKSIZE, FUSE_MAX_BLOCKSIZE);

    data->iosize = fuse_round_size(fusefs_args.iosize,
                                   FUSE_MIN_IOSIZE, FUSE_MAX_IOSIZE);

    if (data->iosize < data->blocksize) {
        data->iosize = data->blocksize;
    }

    data->userkernel_bufsize = FUSE_DEFAULT_USERKERNEL_BUFSIZE;

    copystr(fusefs_args.fsname, vfsstatfsp->f_mntfromname,
            MNAMELEN - 1, &len);
    bzero(vfsstatfsp->f_mntfromname + len, MNAMELEN - len);

    copystr(fusefs_args.volname, data->volname, MAXPATHLEN - 1, &len);
    bzero(data->volname + len, MAXPATHLEN - len);

    /* previous location of vfs_setioattr() */

    vfs_setfsprivate(mp, data);

    fuse_lck_mtx_unlock(fdev->mtx);

    /* Send a handshake message to the daemon. */
    fuse_send_init(data, context);

    struct vfs_attr vfs_attr;
    VFSATTR_INIT(&vfs_attr);
    // Our vfs_getattr() doesn't look at most *_IS_ACTIVE()'s
    err = fuse_vfsop_getattr(mp, &vfs_attr, context);
    if (!err) {
        vfsstatfsp->f_bsize  = vfs_attr.f_bsize;
        vfsstatfsp->f_iosize = vfs_attr.f_iosize;
        vfsstatfsp->f_blocks = vfs_attr.f_blocks;
        vfsstatfsp->f_bfree  = vfs_attr.f_bfree;
        vfsstatfsp->f_bavail = vfs_attr.f_bavail;
        vfsstatfsp->f_bused  = vfs_attr.f_bused;
        vfsstatfsp->f_files  = vfs_attr.f_files;
        vfsstatfsp->f_ffree  = vfs_attr.f_ffree;
        // vfsstatfsp->f_fsid already handled above
        vfsstatfsp->f_owner  = kauth_cred_getuid(data->daemoncred);
        vfsstatfsp->f_flags  = vfs_flags(mp);
        // vfsstatfsp->f_fstypename already handled above
        // vfsstatfsp->f_mntonname handled elsewhere
        // vfsstatfsp->f_mnfromname already handled above
        vfsstatfsp->f_fssubtype = data->fssubtype;
    }
    if (fusefs_args.altflags & FUSE_MOPT_BLOCKSIZE) {
        vfsstatfsp->f_bsize = data->blocksize;
    } else {
        //data->blocksize = vfsstatfsp->f_bsize;
    }
    if (fusefs_args.altflags & FUSE_MOPT_IOSIZE) {
        vfsstatfsp->f_iosize = data->iosize;
    } else {
        //data->iosize = (uint32_t)vfsstatfsp->f_iosize;
        vfsstatfsp->f_iosize = data->iosize;
    }

out:
    if (err) {
        vfs_setfsprivate(mp, NULL);

        fuse_lck_mtx_lock(fdev->mtx);
        data = fdev->data; /* again */
        if (mounted) {
            OSAddAtomic(-1, (SInt32 *)&fuse_mount_count);
        }
        if (data) {
            data->dataflags &= ~FSESS_MOUNTED;
            if (!(data->dataflags & FSESS_OPENED)) {
#if M_FUSE4X_ENABLE_BIGLOCK
                assert(biglock == data->biglock);
                fuse_biglock_unlock(biglock);
#endif
                fuse_device_close_final(fdev);
                /* data is gone now */
            }
        }
        fuse_lck_mtx_unlock(fdev->mtx);
    } else {
        vnode_t fuse_rootvp = NULLVP;
        err = fuse_vfsop_root(mp, &fuse_rootvp, context);
        if (err) {
            goto out; /* go back and follow error path */
        }
        err = vnode_ref(fuse_rootvp);
        (void)vnode_put(fuse_rootvp);
        if (err) {
            goto out; /* go back and follow error path */
        } else {
            struct vfsioattr ioattr;

            vfs_ioattr(mp, &ioattr);
            ioattr.io_devblocksize = data->blocksize;
            vfs_setioattr(mp, &ioattr);
        }
    }

#if M_FUSE4X_ENABLE_BIGLOCK
    fuse_lck_mtx_lock(fdev->mtx);
    data = fdev->data; /* ...and again */
    if(data) {
        assert(data->biglock == biglock);
        fuse_biglock_unlock(biglock);
    }
    fuse_lck_mtx_unlock(fdev->mtx);
#endif

    return err;
}
Ejemplo n.º 18
0
int
cttyopen(dev_t dev, int flag, __unused int mode, proc_t p)
{
	vnode_t ttyvp = cttyvp(p);
	struct vfs_context context;
	int error = 0;
	int cttyflag, doclose = 0;
	struct session *sessp;

	if (ttyvp == NULL)
		return (ENXIO);

	context.vc_thread = current_thread();
	context.vc_ucred = kauth_cred_proc_ref(p);

	sessp = proc_session(p);
	session_lock(sessp);
	cttyflag = sessp->s_flags & S_CTTYREF;	
	session_unlock(sessp);

	/*
	 * A little hack--this device, used by many processes,
	 * happens to do an open on another device, which can 
	 * cause unhappiness if the second-level open blocks indefinitely 
	 * (as could be the case if the master side has hung up).  Since
	 * we know that this driver doesn't care about the serializing
	 * opens and closes, we can drop the lock. To avoid opencount leak,
	 * open the vnode only for the first time. 
	 */
	if (cttyflag == 0) {
		devsw_unlock(dev, S_IFCHR);
		error = VNOP_OPEN(ttyvp, flag, &context);
		devsw_lock(dev, S_IFCHR);

		if (error) 
			goto out;
	
		/*
		 * If S_CTTYREF is set, some other thread did an open
		 * and was able to set the flag, now perform a close, else
		 * set the flag.
		 */
		session_lock(sessp);
		if (cttyflag == (sessp->s_flags & S_CTTYREF))
			sessp->s_flags |= S_CTTYREF;
		else
			doclose = 1;
		session_unlock(sessp);

		/*
		 * We have to take a reference here to make sure a close
		 * gets called during revoke. Note that once a controlling 
		 * tty gets opened by this driver, the only way close will
		 * get called is when the session leader , whose controlling
		 * tty is ttyvp, exits and vnode is revoked. We cannot 
		 * redirect close from this driver because underlying controlling
		 * terminal might change and close may get redirected to a 
		 * wrong vnode causing panic.
		 */
		if (doclose) {
			devsw_unlock(dev, S_IFCHR);
			VNOP_CLOSE(ttyvp, flag, &context);
			devsw_lock(dev, S_IFCHR);
		} else {
			error = vnode_ref(ttyvp);
		}
	}
out:
	session_rele(sessp);

	vnode_put(ttyvp);
	kauth_cred_unref(&context.vc_ucred);

	return (error);
}
Ejemplo n.º 19
0
/**
 *   Reset volume name to volume id mapping cache.
 * @param flags
 */
void
afs_CheckVolumeNames(int flags)
{
    afs_int32 i, j;
    struct volume *tv;
    unsigned int now;
    struct vcache *tvc;
    afs_int32 *volumeID, *cellID, vsize, nvols;
#ifdef AFS_DARWIN80_ENV
    vnode_t tvp;
#endif
    AFS_STATCNT(afs_CheckVolumeNames);

    nvols = 0;
    volumeID = cellID = NULL;
    vsize = 0;
    ObtainReadLock(&afs_xvolume);
    if (flags & AFS_VOLCHECK_EXPIRED) {
	/*
	 * allocate space to hold the volumeIDs and cellIDs, only if
	 * we will be invalidating the mountpoints later on
	 */
	for (i = 0; i < NVOLS; i++)
	    for (tv = afs_volumes[i]; tv; tv = tv->next)
		++vsize;

	volumeID = afs_osi_Alloc(2 * vsize * sizeof(*volumeID));
	cellID = (volumeID) ? volumeID + vsize : 0;
    }

    now = osi_Time();
    for (i = 0; i < NVOLS; i++) {
	for (tv = afs_volumes[i]; tv; tv = tv->next) {
	    if (flags & AFS_VOLCHECK_EXPIRED) {
		if (((tv->expireTime < (now + 10)) && (tv->states & VRO))
		    || (flags & AFS_VOLCHECK_FORCE)) {
		    afs_ResetVolumeInfo(tv);	/* also resets status */
		    if (volumeID) {
			volumeID[nvols] = tv->volume;
			cellID[nvols] = tv->cell;
		    }
		    ++nvols;
		    continue;
		}
	    }
	    /* ??? */
	    if (flags & (AFS_VOLCHECK_BUSY | AFS_VOLCHECK_FORCE)) {
		for (j = 0; j < AFS_MAXHOSTS; j++)
		    tv->status[j] = not_busy;
	    }

	}
    }
    ReleaseReadLock(&afs_xvolume);


    /* next ensure all mt points are re-evaluated */
    if (nvols || (flags & (AFS_VOLCHECK_FORCE | AFS_VOLCHECK_MTPTS))) {
loop:
	ObtainReadLock(&afs_xvcache);
	for (i = 0; i < VCSIZE; i++) {
	    for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {

		/* if the volume of "mvid" of the vcache entry is among the
		 * ones we found earlier, then we re-evaluate it.  Also, if the
		 * force bit is set or we explicitly asked to reevaluate the
		 * mt-pts, we clean the cmvalid bit */

		if ((flags & (AFS_VOLCHECK_FORCE | AFS_VOLCHECK_MTPTS))
		    || (tvc->mvid
			&& inVolList(tvc->mvid, nvols, volumeID, cellID)))
		    tvc->f.states &= ~CMValid;

		/* If the volume that this file belongs to was reset earlier,
		 * then we should remove its callback.
		 * Again, if forced, always do it.
		 */
		if ((tvc->f.states & CRO)
		    && (inVolList(&tvc->f.fid, nvols, volumeID, cellID)
			|| (flags & AFS_VOLCHECK_FORCE))) {

                    if (tvc->f.states & CVInit) {
                        ReleaseReadLock(&afs_xvcache);
			afs_osi_Sleep(&tvc->f.states);
                        goto loop;
                    }
#ifdef AFS_DARWIN80_ENV
                    if (tvc->f.states & CDeadVnode) {
                        ReleaseReadLock(&afs_xvcache);
			afs_osi_Sleep(&tvc->f.states);
                        goto loop;
                    }
		    tvp = AFSTOV(tvc);
		    if (vnode_get(tvp))
			continue;
		    if (vnode_ref(tvp)) {
			AFS_GUNLOCK();
			/* AFSTOV(tvc) may be NULL */
			vnode_put(tvp);
			AFS_GLOCK();
			continue;
		    }
#else
		    AFS_FAST_HOLD(tvc);
#endif
		    ReleaseReadLock(&afs_xvcache);

		    ObtainWriteLock(&afs_xcbhash, 485);
		    /* LOCKXXX: We aren't holding tvc write lock? */
		    afs_DequeueCallback(tvc);
		    tvc->f.states &= ~CStatd;
		    ReleaseWriteLock(&afs_xcbhash);
		    if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR))
			osi_dnlc_purgedp(tvc);

#ifdef AFS_DARWIN80_ENV
		    vnode_put(AFSTOV(tvc));
		    /* our tvc ptr is still good until now */
		    AFS_FAST_RELE(tvc);
		    ObtainReadLock(&afs_xvcache);
#else
		    ObtainReadLock(&afs_xvcache);

		    /* our tvc ptr is still good until now */
		    AFS_FAST_RELE(tvc);
#endif
		}
	    }
	}
	osi_dnlc_purge();	/* definitely overkill, but it's safer this way. */
	ReleaseReadLock(&afs_xvcache);
    }

    if (volumeID)
	afs_osi_Free(volumeID, 2 * vsize * sizeof(*volumeID));

}				/*afs_CheckVolumeNames */
Ejemplo n.º 20
0
static int
ClearCallBack(struct rx_connection *a_conn,
	      struct AFSFid *a_fid)
{
    struct vcache *tvc;
    int i;
    struct VenusFid localFid;
    struct volume *tv;
#ifdef AFS_DARWIN80_ENV
    vnode_t vp;
#endif

    AFS_STATCNT(ClearCallBack);

    AFS_ASSERT_GLOCK();

    /*
     * XXXX Don't hold any server locks here because of callback protocol XXX
     */
    localFid.Cell = 0;
    localFid.Fid.Volume = a_fid->Volume;
    localFid.Fid.Vnode = a_fid->Vnode;
    localFid.Fid.Unique = a_fid->Unique;

    /*
     * Volume ID of zero means don't do anything.
     */
    if (a_fid->Volume != 0) {
	if (a_fid->Vnode == 0) {
		struct afs_q *tq, *uq;
	    /*
	     * Clear callback for the whole volume.  Zip through the
	     * hash chain, nullifying entries whose volume ID matches.
	     */
loop1:
		ObtainReadLock(&afs_xvcache);
		i = VCHashV(&localFid);
		for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) {
		    uq = QPrev(tq);
		    tvc = QTOVH(tq);
		    if (tvc->f.fid.Fid.Volume == a_fid->Volume) {
			tvc->callback = NULL;
			if (!localFid.Cell)
			    localFid.Cell = tvc->f.fid.Cell;
			tvc->dchint = NULL;	/* invalidate hints */
			if (tvc->f.states & CVInit) {
			    ReleaseReadLock(&afs_xvcache);
			    afs_osi_Sleep(&tvc->f.states);
			    goto loop1;
			}
#if     defined(AFS_SGI_ENV) || defined(AFS_SUN5_ENV)  || defined(AFS_HPUX_ENV) || defined(AFS_LINUX20_ENV)
			AFS_FAST_HOLD(tvc);
#else
#ifdef AFS_DARWIN80_ENV
			if (tvc->f.states & CDeadVnode) {
			    if (!(tvc->f.states & CBulkFetching)) {
				ReleaseReadLock(&afs_xvcache);
				afs_osi_Sleep(&tvc->f.states);
				goto loop1;
			    }
			}
			vp = AFSTOV(tvc);
			if (vnode_get(vp))
			    continue;
			if (vnode_ref(vp)) {
			    AFS_GUNLOCK();
			    vnode_put(vp);
			    AFS_GLOCK();
			    continue;
			}
			if (tvc->f.states & (CBulkFetching|CDeadVnode)) {
			    AFS_GUNLOCK();
			    vnode_recycle(AFSTOV(tvc));
			    AFS_GLOCK();
			}
#else
			AFS_FAST_HOLD(tvc);
#endif
#endif
			ReleaseReadLock(&afs_xvcache);
			ObtainWriteLock(&afs_xcbhash, 449);
			afs_DequeueCallback(tvc);
			tvc->f.states &= ~(CStatd | CUnique | CBulkFetching);
			afs_allCBs++;
			if (tvc->f.fid.Fid.Vnode & 1)
			    afs_oddCBs++;
			else
			    afs_evenCBs++;
			ReleaseWriteLock(&afs_xcbhash);
			if ((tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)))
			    osi_dnlc_purgedp(tvc);
			afs_Trace3(afs_iclSetp, CM_TRACE_CALLBACK,
				   ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32,
				   tvc->f.states, ICL_TYPE_INT32,
				   a_fid->Volume);
#ifdef AFS_DARWIN80_ENV
			vnode_put(AFSTOV(tvc));
#endif
			ObtainReadLock(&afs_xvcache);
			uq = QPrev(tq);
			AFS_FAST_RELE(tvc);
		    } else if ((tvc->f.states & CMValid)
			       && (tvc->mvid->Fid.Volume == a_fid->Volume)) {
			tvc->f.states &= ~CMValid;
			if (!localFid.Cell)
			    localFid.Cell = tvc->mvid->Cell;
		    }
		}
		ReleaseReadLock(&afs_xvcache);

	    /*
	     * XXXX Don't hold any locks here XXXX
	     */
	    tv = afs_FindVolume(&localFid, 0);
	    if (tv) {
		afs_ResetVolumeInfo(tv);
		afs_PutVolume(tv, 0);
		/* invalidate mtpoint? */
	    }
	} /*Clear callbacks for whole volume */
	else {
	    /*
	     * Clear callbacks just for the one file.
	     */
	    struct vcache *uvc;
	    afs_allCBs++;
	    if (a_fid->Vnode & 1)
		afs_oddCBs++;	/*Could do this on volume basis, too */
	    else
		afs_evenCBs++;	/*A particular fid was specified */
loop2:
	    ObtainReadLock(&afs_xvcache);
	    i = VCHash(&localFid);
	    for (tvc = afs_vhashT[i]; tvc; tvc = uvc) {
		uvc = tvc->hnext;
		if (tvc->f.fid.Fid.Vnode == a_fid->Vnode
		    && tvc->f.fid.Fid.Volume == a_fid->Volume
		    && tvc->f.fid.Fid.Unique == a_fid->Unique) {
		    tvc->callback = NULL;
		    tvc->dchint = NULL;	/* invalidate hints */
		    if (tvc->f.states & CVInit) {
			ReleaseReadLock(&afs_xvcache);
			afs_osi_Sleep(&tvc->f.states);
			goto loop2;
		    }
#if     defined(AFS_SGI_ENV) || defined(AFS_SUN5_ENV)  || defined(AFS_HPUX_ENV) || defined(AFS_LINUX20_ENV)
		    AFS_FAST_HOLD(tvc);
#else
#ifdef AFS_DARWIN80_ENV
		    if (tvc->f.states & CDeadVnode) {
			if (!(tvc->f.states & CBulkFetching)) {
			    ReleaseReadLock(&afs_xvcache);
			    afs_osi_Sleep(&tvc->f.states);
			    goto loop2;
			}
		    }
		    vp = AFSTOV(tvc);
		    if (vnode_get(vp))
			continue;
		    if (vnode_ref(vp)) {
			AFS_GUNLOCK();
			vnode_put(vp);
			AFS_GLOCK();
			continue;
		    }
		    if (tvc->f.states & (CBulkFetching|CDeadVnode)) {
			AFS_GUNLOCK();
			vnode_recycle(AFSTOV(tvc));
			AFS_GLOCK();
		    }
#else
		    AFS_FAST_HOLD(tvc);
#endif
#endif
		    ReleaseReadLock(&afs_xvcache);
		    ObtainWriteLock(&afs_xcbhash, 450);
		    afs_DequeueCallback(tvc);
		    tvc->f.states &= ~(CStatd | CUnique | CBulkFetching);
		    ReleaseWriteLock(&afs_xcbhash);
		    if ((tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)))
			osi_dnlc_purgedp(tvc);
		    afs_Trace3(afs_iclSetp, CM_TRACE_CALLBACK,
			       ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32,
			       tvc->f.states, ICL_TYPE_LONG, 0);
#ifdef CBDEBUG
		    lastCallBack_vnode = afid->Vnode;
		    lastCallBack_dv = tvc->mstat.DataVersion.low;
		    osi_GetuTime(&lastCallBack_time);
#endif /* CBDEBUG */
#ifdef AFS_DARWIN80_ENV
		    vnode_put(AFSTOV(tvc));
#endif
		    ObtainReadLock(&afs_xvcache);
		    uvc = tvc->hnext;
		    AFS_FAST_RELE(tvc);
		}
	    }			/*Walk through hash table */
	    ReleaseReadLock(&afs_xvcache);
	}			/*Clear callbacks for one file */
    }

    /*Fid has non-zero volume ID */
    /*
     * Always return a predictable value.
     */
    return (0);

}				/*ClearCallBack */