int imageboot_setup() { dev_t dev; int error = 0; char *root_path = NULL; DBG_TRACE("%s: entry\n", __FUNCTION__); MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); if (root_path == NULL) return (ENOMEM); if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) { error = ENOENT; goto done; } printf("%s: root image url is %s\n", __FUNCTION__, root_path); error = di_root_image(root_path, rootdevice, &dev); if(error) { printf("%s: di_root_image failed: %d\n", __FUNCTION__, error); goto done; } rootdev = dev; mountroot = NULL; printf("%s: root device 0x%x\n", __FUNCTION__, rootdev); error = vfs_mountroot(); if (error == 0 && rootvnode != NULL) { struct vnode *tvp; struct vnode *newdp; /* * Get the vnode for '/'. * Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel())) panic("%s: cannot find root vnode", __FUNCTION__); vnode_ref(newdp); vnode_put(newdp); tvp = rootvnode; vnode_rele(tvp); filedesc0.fd_cdir = newdp; rootvnode = newdp; mount_list_lock(); TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list); mount_list_unlock(); mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; DBG_TRACE("%s: root switched\n", __FUNCTION__); } done: FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI); DBG_TRACE("%s: exit\n", __FUNCTION__); return (error); }
/* * Make a new or get existing nullfs node. * Vp is the alias vnode, lowervp is the lower vnode. * * lowervp is assumed to have an iocount on it from the caller */ int null_nodeget( struct mount * mp, struct vnode * lowervp, struct vnode * dvp, struct vnode ** vpp, struct componentname * cnp, int root) { struct vnode * vp; int error; /* Lookup the hash firstly. */ error = null_hashget(mp, lowervp, vpp); /* ENOENT means it wasn't found, EIO is a failure we should bail from, 0 is it * was found */ if (error != ENOENT) { /* null_hashget checked the vid, so if we got something here its legit to * the best of our knowledge*/ /* if we found something then there is an iocount on vpp, if we didn't find something then vpp shouldn't be used by the caller */ return error; } /* * We do not serialize vnode creation, instead we will check for * duplicates later, when adding new vnode to hash. */ error = vnode_ref(lowervp); // take a ref on lowervp so we let the system know we care about it if(error) { // Failed to get a reference on the lower vp so bail. Lowervp may be gone already. return error; } error = null_getnewvnode(mp, lowervp, dvp, &vp, cnp, root); if (error) { vnode_rele(lowervp); return (error); } /* * Atomically insert our new node into the hash or vget existing * if someone else has beaten us to it. */ error = null_hashins(mp, VTONULL(vp), vpp); if (error || *vpp != NULL) { /* recycle will call reclaim which will get rid of the internals */ vnode_recycle(vp); vnode_put(vp); /* if we found vpp, then null_hashins put an iocount on it */ return error; } /* vp has an iocount from null_getnewvnode */ *vpp = vp; return (0); }
int osi_lookupname(char *aname, enum uio_seg seg, int followlink, struct vnode **vpp) { vfs_context_t ctx; int code, flags; flags = 0; if (!followlink) flags |= VNODE_LOOKUP_NOFOLLOW; ctx=vfs_context_create(NULL); code = vnode_lookup(aname, flags, vpp, ctx); if (!code) { /* get a usecount */ vnode_ref(*vpp); vnode_put(*vpp); } vfs_context_rele(ctx); return code; }
/* * Routine: macx_swapon * Function: * Syscall interface to add a file to backing store */ int macx_swapon( struct macx_swapon_args *args) { int size = args->size; vnode_t vp = (vnode_t)NULL; struct nameidata nd, *ndp; register int error; kern_return_t kr; mach_port_t backing_store; memory_object_default_t default_pager; int i; boolean_t funnel_state; off_t file_size; vfs_context_t ctx = vfs_context_current(); struct proc *p = current_proc(); int dp_cluster_size; AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); AUDIT_ARG(value32, args->priority); funnel_state = thread_funnel_set(kernel_flock, TRUE); ndp = &nd; if ((error = suser(kauth_cred_get(), 0))) goto swapon_bailout; /* * Get a vnode for the paging area. */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), (user_addr_t) args->filename, ctx); if ((error = namei(ndp))) goto swapon_bailout; nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VREG) { error = EINVAL; goto swapon_bailout; } /* get file size */ if ((error = vnode_size(vp, &file_size, ctx)) != 0) goto swapon_bailout; #if CONFIG_MACF vnode_lock(vp); error = mac_system_check_swapon(vfs_context_ucred(ctx), vp); vnode_unlock(vp); if (error) goto swapon_bailout; #endif /* resize to desired size if it's too small */ if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0)) goto swapon_bailout; if (default_pager_init_flag == 0) { start_def_pager(NULL); default_pager_init_flag = 1; } /* add new backing store to list */ i = 0; while(bs_port_table[i].vp != 0) { if(i == MAX_BACKING_STORE) break; i++; } if(i == MAX_BACKING_STORE) { error = ENOMEM; goto swapon_bailout; } /* remember the vnode. This vnode has namei() reference */ bs_port_table[i].vp = vp; /* * Look to see if we are already paging to this file. */ /* make certain the copy send of kernel call will work */ default_pager = MEMORY_OBJECT_DEFAULT_NULL; kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); if(kr != KERN_SUCCESS) { error = EAGAIN; bs_port_table[i].vp = 0; goto swapon_bailout; } if (vp->v_mount->mnt_kern_flag & MNTK_SSD) { /* * keep the cluster size small since the * seek cost is effectively 0 which means * we don't care much about fragmentation */ dp_isssd = TRUE; dp_cluster_size = 2 * PAGE_SIZE; } else { /* * use the default cluster size */ dp_isssd = FALSE; dp_cluster_size = 0; } kr = default_pager_backing_store_create(default_pager, -1, /* default priority */ dp_cluster_size, &backing_store); memory_object_default_deallocate(default_pager); if(kr != KERN_SUCCESS) { error = ENOMEM; bs_port_table[i].vp = 0; goto swapon_bailout; } /* Mark this vnode as being used for swapfile */ vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); /* * NOTE: we are able to supply PAGE_SIZE here instead of * an actual record size or block number because: * a: we do not support offsets from the beginning of the * file (allowing for non page size/record modulo offsets. * b: because allow paging will be done modulo page size */ kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp, PAGE_SIZE, (int)(file_size/PAGE_SIZE)); if(kr != KERN_SUCCESS) { bs_port_table[i].vp = 0; if(kr == KERN_INVALID_ARGUMENT) error = EINVAL; else error = ENOMEM; /* This vnode is not to be used for swapfile */ vnode_lock_spin(vp); CLR(vp->v_flag, VSWAP); vnode_unlock(vp); goto swapon_bailout; } bs_port_table[i].bs = (void *)backing_store; error = 0; ubc_setthreadcred(vp, p, current_thread()); /* * take a long term reference on the vnode to keep * vnreclaim() away from this vnode. */ vnode_ref(vp); swapon_bailout: if (vp) { vnode_put(vp); } (void) thread_funnel_set(kernel_flock, FALSE); AUDIT_MACH_SYSCALL_EXIT(error); return(error); }
static int zfs_vfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context) { char *osname = NULL; size_t osnamelen = 0; int error = 0; int canwrite; /* * Get the objset name (the "special" mount argument). * The filesystem that we mount as root is defined in the * "zfs-bootfs" property. */ if (data) { user_addr_t fspec = USER_ADDR_NULL; #ifndef __APPLE__ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != DDI_SUCCESS) return (EIO); error = parse_bootpath(zfs_bootpath, rootfs.bo_name); ddi_prop_free(zfs_bootpath); #endif osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); if (vfs_context_is64bit(context)) { if ( (error = copyin(data, (caddr_t)&fspec, sizeof(fspec))) ) goto out; } else { #ifdef ZFS_LEOPARD_ONLY char *tmp; #else user32_addr_t tmp; #endif if ( (error = copyin(data, (caddr_t)&tmp, sizeof(tmp))) ) goto out; /* munge into LP64 addr */ fspec = CAST_USER_ADDR_T(tmp); } if ( (error = copyinstr(fspec, osname, MAXPATHLEN, &osnamelen)) ) goto out; } #if 0 if (mvp->v_type != VDIR) return (ENOTDIR); mutex_enter(&mvp->v_lock); if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 && (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { mutex_exit(&mvp->v_lock); return (EBUSY); } mutex_exit(&mvp->v_lock); /* * ZFS does not support passing unparsed data in via MS_DATA. * Users should use the MS_OPTIONSTR interface; this means * that all option parsing is already done and the options struct * can be interrogated. */ if ((uap->flags & MS_DATA) && uap->datalen > 0) return (EINVAL); /* * Get the objset name (the "special" mount argument). */ if (error = pn_get(uap->spec, fromspace, &spn)) return (error); osname = spn.pn_path; #endif /* * Check for mount privilege? * * If we don't have privilege then see if * we have local permission to allow it */ #ifndef __APPLE__ error = secpolicy_fs_mount(cr, mvp, vfsp); if (error) { error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); if (error == 0) { vattr_t vattr; /* * Make sure user is the owner of the mount point * or has sufficient privileges. */ vattr.va_mask = AT_UID; if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { goto out; } if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { goto out; } if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { goto out; } secpolicy_fs_mount_clearopts(cr, vfsp); } else { goto out; } } #endif error = zfs_domount(mp, 0, osname, context); if (error) printf("zfs_vfs_mount: error %d\n", error); if (error == 0) { zfsvfs_t *zfsvfs = NULL; /* Make the Finder treat sub file systems just like a folder */ if (strpbrk(osname, "/")) vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DONTBROWSE)); /* Indicate to VFS that we support ACLs. */ vfs_setextendedsecurity(mp); /* Advisory locking should be handled at the VFS layer */ vfs_setlocklocal(mp); /* * Mac OS X needs a file system modify time * * We use the mtime of the "com.apple.system.mtime" * extended attribute, which is associated with the * file system root directory. * * Here we need to take a ref on z_mtime_vp to keep it around. * If the attribute isn't there, attempt to create it. */ zfsvfs = vfs_fsprivate(mp); if (zfsvfs->z_mtime_vp == NULL) { struct vnode * rvp; struct vnode *xdvp = NULLVP; struct vnode *xvp = NULLVP; znode_t *rootzp; timestruc_t modify_time; cred_t *cr; timestruc_t now; int flag; int result; if (zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp) != 0) { goto out; } rvp = ZTOV(rootzp); cr = (cred_t *)vfs_context_ucred(context); /* Grab the hidden attribute directory vnode. */ result = zfs_get_xattrdir(rootzp, &xdvp, cr, CREATE_XATTR_DIR); vnode_put(rvp); /* all done with root vnode */ rvp = NULL; if (result) { goto out; } /* * HACK - workaround missing vnode_setnoflush() KPI... * * We tag zfsvfs so that zfs_attach_vnode() can then set * vnfs_marksystem when the vnode gets created. */ zfsvfs->z_last_unmount_time = 0xBADC0DE; zfsvfs->z_last_mtime_synced = VTOZ(xdvp)->z_id; flag = vfs_isrdonly(mp) ? 0 : ZEXISTS; /* Lookup or create the named attribute. */ if ( zfs_obtain_xattr(VTOZ(xdvp), ZFS_MTIME_XATTR, S_IRUSR | S_IWUSR, cr, &xvp, flag) ) { zfsvfs->z_last_unmount_time = 0; zfsvfs->z_last_mtime_synced = 0; vnode_put(xdvp); goto out; } gethrestime(&now); ZFS_TIME_ENCODE(&now, VTOZ(xvp)->z_phys->zp_mtime); vnode_put(xdvp); vnode_ref(xvp); zfsvfs->z_mtime_vp = xvp; ZFS_TIME_DECODE(&modify_time, VTOZ(xvp)->z_phys->zp_mtime); zfsvfs->z_last_unmount_time = modify_time.tv_sec; zfsvfs->z_last_mtime_synced = modify_time.tv_sec; /* * Keep this referenced vnode from impeding an unmount. * * XXX vnode_setnoflush() is MIA from KPI (see workaround above). */ #if 0 vnode_setnoflush(xvp); #endif vnode_put(xvp); } } out: if (osname) { kmem_free(osname, MAXPATHLEN); } return (error); }
/* * Function: devfs_kernel_mount * Purpose: * Mount devfs at the given mount point from within the kernel. */ int devfs_kernel_mount(char * mntname) { struct mount *mp; int error; struct nameidata nd; struct vnode * vp; vfs_context_t ctx = vfs_context_kernel(); struct vfstable *vfsp; /* Find our vfstable entry */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name))) break; if (!vfsp) { panic("Could not find entry in vfsconf for devfs.\n"); } /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { printf("devfs_kernel_mount: failed to find directory '%s', %d", mntname, error); return (error); } nameidone(&nd); vp = nd.ni_vp; if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) { printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error); vnode_put(vp); return (error); } if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error); vnode_put(vp); return (error); } if (vnode_isdir(vp) == 0) { printf("devfs_kernel_mount: '%s' is not a directory\n", mntname); vnode_put(vp); return (ENOTDIR); } if ((vnode_mountedhere(vp))) { vnode_put(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; mount_lock_init(mp); TAILQ_INIT(&mp->mnt_vnodelist); TAILQ_INIT(&mp->mnt_workerqueue); TAILQ_INIT(&mp->mnt_newvnodes); (void)vfs_busy(mp, LK_NOWAIT); mp->mnt_op = &devfs_vfsops; mp->mnt_vtable = vfsp; mp->mnt_flag = 0; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); #if CONFIG_MACF mac_mount_label_init(mp); mac_mount_label_associate(ctx, mp); #endif error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx); if (error) { printf("devfs_kernel_mount: mount %s failed: %d", mntname, error); mp->mnt_vtable->vfc_refcount--; vfs_unbusy(mp); mount_lock_destroy(mp); #if CONFIG_MACF mac_mount_label_destroy(mp); #endif FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); vnode_put(vp); return (error); } vnode_ref(vp); vnode_put(vp); vfs_unbusy(mp); mount_list_add(mp); return (0); }
struct vcache * osi_dnlc_lookup(struct vcache *adp, char *aname, int locktype) { struct vcache *tvc; unsigned int key, skey; char *ts = aname; struct nc *tnc; int safety; #ifdef AFS_DARWIN80_ENV vnode_t tvp; #endif if (!afs_usednlc) return 0; dnlcHash(ts, key); /* leaves ts pointing at the NULL */ if (ts - aname >= AFSNCNAMESIZE) return 0; skey = key & (NHSIZE - 1); TRACE(osi_dnlc_lookupT, skey); dnlcstats.lookups++; ObtainReadLock(&afs_xvcache); ObtainReadLock(&afs_xdnlc); for (tvc = NULL, tnc = nameHash[skey], safety = 0; tnc; tnc = tnc->next, safety++) { if ( /* (tnc->key == key) && */ (tnc->dirp == adp) && (!strcmp((char *)tnc->name, aname))) { tvc = tnc->vp; break; } else if (tnc->next == nameHash[skey]) { /* end of list */ break; } else if (safety > NCSIZE) { afs_warn("DNLC cycle"); dnlcstats.cycles++; ReleaseReadLock(&afs_xdnlc); ReleaseReadLock(&afs_xvcache); osi_dnlc_purge(); return (0); } } ReleaseReadLock(&afs_xdnlc); if (!tvc) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; } else { if ((tvc->f.states & CVInit) #ifdef AFS_DARWIN80_ENV ||(tvc->f.states & CDeadVnode) #endif ) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); return 0; } #if defined(AFS_DARWIN80_ENV) tvp = AFSTOV(tvc); if (vnode_get(tvp)) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); return 0; } if (vnode_ref(tvp)) { ReleaseReadLock(&afs_xvcache); AFS_GUNLOCK(); vnode_put(tvp); AFS_GLOCK(); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); return 0; } #else osi_vnhold(tvc, 0); #endif ReleaseReadLock(&afs_xvcache); } return tvc; }
void bsd_init(void) { struct uthread *ut; unsigned int i; struct vfs_context context; kern_return_t ret; struct ucred temp_cred; struct posix_cred temp_pcred; #if NFSCLIENT || CONFIG_IMAGEBOOT boolean_t netboot = FALSE; #endif #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */ throttle_init(); printf(copyright); bsd_init_kprintf("calling kmeminit\n"); kmeminit(); bsd_init_kprintf("calling parse_bsd_args\n"); parse_bsd_args(); #if CONFIG_DEV_KMEM bsd_init_kprintf("calling dev_kmem_init\n"); dev_kmem_init(); #endif /* Initialize kauth subsystem before instancing the first credential */ bsd_init_kprintf("calling kauth_init\n"); kauth_init(); /* Initialize process and pgrp structures. */ bsd_init_kprintf("calling procinit\n"); procinit(); /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/ tty_init(); kernproc = &proc0; /* implicitly bzero'ed */ /* kernel_task->proc = kernproc; */ set_bsdtask_info(kernel_task,(void *)kernproc); /* give kernproc a name */ bsd_init_kprintf("calling process_name\n"); process_name("kernel_task", kernproc); /* allocate proc lock group attribute and group */ bsd_init_kprintf("calling lck_grp_attr_alloc_init\n"); proc_lck_grp_attr= lck_grp_attr_alloc_init(); proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); #if CONFIG_FINE_LOCK_GROUPS proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr); proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr); proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr); proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr); #endif /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); #if 0 #if __PROC_INTERNAL_DEBUG lck_attr_setdebug(proc_lck_attr); #endif #endif #if CONFIG_FINE_LOCK_GROUPS proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr); #else proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr); #endif assert(bsd_simul_execs != 0); execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); execargs_cache_size = bsd_simul_execs; execargs_free_count = bsd_simul_execs; execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t)); bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t)); if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); bsd_init_kprintf("calling get_bsdthread_info\n"); ut = (uthread_t)get_bsdthread_info(current_thread()); #if CONFIG_MACF /* * Initialize the MAC Framework */ mac_policy_initbsd(); kernproc->p_mac_enforce = 0; #if defined (__i386__) || defined (__x86_64__) /* * We currently only support this on i386/x86_64, as that is the * only lock code we have instrumented so far. */ check_policy_init(policy_check_flags); #endif #endif /* MAC */ /* Initialize System Override call */ init_system_override(); /* * Create process 0. */ proc_list_lock(); LIST_INSERT_HEAD(&allproc, kernproc, p_list); kernproc->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); #ifdef CONFIG_FINE_LOCK_GROUPS lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr); #else lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); #endif /* There is no other bsd thread this point and is safe without pgrp lock */ LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist); kernproc->p_listflag |= P_LIST_INPGRP; kernproc->p_pgrpid = 0; kernproc->p_uniqueid = 0; pgrp0.pg_session = &session0; pgrp0.pg_membercnt = 1; session0.s_count = 1; session0.s_leader = kernproc; session0.s_listflags = 0; #ifdef CONFIG_FINE_LOCK_GROUPS lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr); #else lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr); #endif LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); kernproc->task = kernel_task; kernproc->p_stat = SRUN; kernproc->p_flag = P_SYSTEM; kernproc->p_lflag = 0; kernproc->p_ladvflag = 0; #if DEVELOPMENT || DEBUG if (bootarg_disable_aslr) kernproc->p_flag |= P_DISABLE_ASLR; #endif kernproc->p_nice = NZERO; kernproc->p_pptr = kernproc; TAILQ_INIT(&kernproc->p_uthlist); TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list); kernproc->sigwait = FALSE; kernproc->sigwait_thread = THREAD_NULL; kernproc->exit_thread = THREAD_NULL; kernproc->p_csflags = CS_VALID; /* * Create credential. This also Initializes the audit information. */ bsd_init_kprintf("calling bzero\n"); bzero(&temp_cred, sizeof(temp_cred)); bzero(&temp_pcred, sizeof(temp_pcred)); temp_pcred.cr_ngroups = 1; /* kern_proc, shouldn't call up to DS for group membership */ temp_pcred.cr_flags = CRF_NOMEMBERD; temp_cred.cr_audit.as_aia_p = audit_default_aia_p; bsd_init_kprintf("calling kauth_cred_create\n"); /* * We have to label the temp cred before we create from it to * properly set cr_ngroups, or the create will fail. */ posix_cred_label(&temp_cred, &temp_pcred); kernproc->p_ucred = kauth_cred_create(&temp_cred); /* update cred on proc */ PROC_UPDATE_CREDS_ONPROC(kernproc); /* give the (already exisiting) initial thread a reference on it */ bsd_init_kprintf("calling kauth_cred_ref\n"); kauth_cred_ref(kernproc->p_ucred); ut->uu_context.vc_ucred = kernproc->p_ucred; ut->uu_context.vc_thread = current_thread(); TAILQ_INIT(&kernproc->p_aio_activeq); TAILQ_INIT(&kernproc->p_aio_doneq); kernproc->p_aio_total_count = 0; kernproc->p_aio_active_count = 0; bsd_init_kprintf("calling file_lock_init\n"); file_lock_init(); #if CONFIG_MACF mac_cred_label_associate_kernel(kernproc->p_ucred); #endif /* Create the file descriptor table. */ kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; filedesc0.fd_knlist = NULL; filedesc0.fd_knhash = NULL; filedesc0.fd_knhashmask = 0; /* Create the limits structures. */ kernproc->p_limit = &limit0; for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++) limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid; limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack; limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data; limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core; limit0.pl_refcnt = 1; kernproc->p_stats = &pstats0; kernproc->p_sigacts = &sigacts0; /* * Charge root for one process: launchd. */ bsd_init_kprintf("calling chgproccnt\n"); (void)chgproccnt(0, 1); /* * Allocate a kernel submap for pageable memory * for temporary copying (execve()). */ { vm_offset_t minimum; bsd_init_kprintf("calling kmem_suballoc\n"); assert(bsd_pageable_map_size != 0); ret = kmem_suballoc(kernel_map, &minimum, (vm_size_t)bsd_pageable_map_size, TRUE, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD), &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); } /* * Initialize buffers and hash links for buffers * * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must * happen after a credential has been associated with * the kernel task. */ bsd_init_kprintf("calling bsd_bufferinit\n"); bsd_bufferinit(); /* Initialize the execve() semaphore */ bsd_init_kprintf("calling semaphore_create\n"); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to create execve semaphore"); /* * Initialize the calendar. */ bsd_init_kprintf("calling IOKitInitializeTime\n"); IOKitInitializeTime(); bsd_init_kprintf("calling ubc_init\n"); ubc_init(); /* * Initialize device-switches. */ bsd_init_kprintf("calling devsw_init() \n"); devsw_init(); /* Initialize the file systems. */ bsd_init_kprintf("calling vfsinit\n"); vfsinit(); #if CONFIG_PROC_UUID_POLICY /* Initial proc_uuid_policy subsystem */ bsd_init_kprintf("calling proc_uuid_policy_init()\n"); proc_uuid_policy_init(); #endif #if SOCKETS /* Initialize per-CPU cache allocator */ mcache_init(); /* Initialize mbuf's. */ bsd_init_kprintf("calling mbinit\n"); mbinit(); net_str_id_init(); /* for mbuf tags */ #endif /* SOCKETS */ /* * Initializes security event auditing. * XXX: Should/could this occur later? */ #if CONFIG_AUDIT bsd_init_kprintf("calling audit_init\n"); audit_init(); #endif /* Initialize kqueues */ bsd_init_kprintf("calling knote_init\n"); knote_init(); /* Initialize for async IO */ bsd_init_kprintf("calling aio_init\n"); aio_init(); /* Initialize pipes */ bsd_init_kprintf("calling pipeinit\n"); pipeinit(); /* Initialize SysV shm subsystem locks; the subsystem proper is * initialized through a sysctl. */ #if SYSV_SHM bsd_init_kprintf("calling sysv_shm_lock_init\n"); sysv_shm_lock_init(); #endif #if SYSV_SEM bsd_init_kprintf("calling sysv_sem_lock_init\n"); sysv_sem_lock_init(); #endif #if SYSV_MSG bsd_init_kprintf("sysv_msg_lock_init\n"); sysv_msg_lock_init(); #endif bsd_init_kprintf("calling pshm_lock_init\n"); pshm_lock_init(); bsd_init_kprintf("calling psem_lock_init\n"); psem_lock_init(); pthread_init(); /* POSIX Shm and Sem */ bsd_init_kprintf("calling pshm_cache_init\n"); pshm_cache_init(); bsd_init_kprintf("calling psem_cache_init\n"); psem_cache_init(); bsd_init_kprintf("calling time_zone_slock_init\n"); time_zone_slock_init(); bsd_init_kprintf("calling select_waitq_init\n"); select_waitq_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. */ bsd_init_kprintf("calling sysctl_register_fixed\n"); sysctl_register_fixed(); bsd_init_kprintf("calling sysctl_mib_init\n"); sysctl_mib_init(); #if NETWORKING bsd_init_kprintf("calling dlil_init\n"); dlil_init(); bsd_init_kprintf("calling proto_kpi_init\n"); proto_kpi_init(); #endif /* NETWORKING */ #if SOCKETS bsd_init_kprintf("calling socketinit\n"); socketinit(); bsd_init_kprintf("calling domaininit\n"); domaininit(); iptap_init(); #if FLOW_DIVERT flow_divert_init(); #endif /* FLOW_DIVERT */ #endif /* SOCKETS */ kernproc->p_fd->fd_cdir = NULL; kernproc->p_fd->fd_rdir = NULL; #if CONFIG_FREEZE #ifndef CONFIG_MEMORYSTATUS #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS" #endif /* Initialise background freezing */ bsd_init_kprintf("calling memorystatus_freeze_init\n"); memorystatus_freeze_init(); #endif #if CONFIG_MEMORYSTATUS /* Initialize kernel memory status notifications */ bsd_init_kprintf("calling memorystatus_init\n"); memorystatus_init(); #endif /* CONFIG_MEMORYSTATUS */ bsd_init_kprintf("calling macx_init\n"); macx_init(); bsd_init_kprintf("calling acct_init\n"); acct_init(); #ifdef GPROF /* Initialize kernel profiling. */ kmstartup(); #endif bsd_init_kprintf("calling bsd_autoconf\n"); bsd_autoconf(); #if CONFIG_DTRACE dtrace_postinit(); #endif /* * We attach the loopback interface *way* down here to ensure * it happens after autoconf(), otherwise it becomes the * "primary" interface. */ #include <loop.h> #if NLOOP > 0 bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif #if NGIF /* Initialize gif interface (after lo0) */ gif_init(); #endif #if PFLOG /* Initialize packet filter log interface */ pfloginit(); #endif /* PFLOG */ #if NETHER > 0 /* Register the built-in dlil ethernet interface family */ bsd_init_kprintf("calling ether_family_init\n"); ether_family_init(); #endif /* ETHER */ #if NETWORKING /* Call any kext code that wants to run just after network init */ bsd_init_kprintf("calling net_init_run\n"); net_init_run(); #if CONTENT_FILTER cfil_init(); #endif #if PACKET_MANGLER pkt_mnglr_init(); #endif #if NECP /* Initialize Network Extension Control Policies */ necp_init(); #endif netagent_init(); /* register user tunnel kernel control handler */ utun_register_control(); #if IPSEC ipsec_register_control(); #endif /* IPSEC */ netsrc_init(); nstat_init(); tcp_cc_init(); #if MPTCP mptcp_control_register(); #endif /* MPTCP */ #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); vnode_pager_bootstrap(); bsd_init_kprintf("calling inittodr\n"); inittodr(0); /* Mount the root file system. */ while( TRUE) { int err; bsd_init_kprintf("calling setconf\n"); setconf(); #if NFSCLIENT netboot = (mountroot == netboot_mountroot); #endif bsd_init_kprintf("vfs_mountroot\n"); if (0 == (err = vfs_mountroot())) break; rootdevice[0] = '\0'; #if NFSCLIENT if (netboot) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: failed to mount network root, error %d, %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } #endif printf("cannot mount root, errno = %d\n", err); boothowto |= RB_ASKNAME; } IOSecureBSDRoot(rootdevice); context.vc_thread = current_thread(); context.vc_ucred = kernproc->p_ucred; mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; bsd_init_kprintf("calling VFS_ROOT\n"); /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context)) panic("bsd_init: cannot find root vnode: %s", PE_boot_args()); rootvnode->v_flag |= VROOT; (void)vnode_ref(rootvnode); (void)vnode_put(rootvnode); filedesc0.fd_cdir = rootvnode; #if NFSCLIENT if (netboot) { int err; netboot = TRUE; /* post mount setup */ if ((err = netboot_setup()) != 0) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: NetBoot could not find root, error %d: %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } } #endif #if CONFIG_IMAGEBOOT /* * See if a system disk image is present. If so, mount it and * switch the root vnode to point to it */ if (netboot == FALSE && imageboot_needed()) { /* * An image was found. No turning back: we're booted * with a kernel from the disk image. */ imageboot_setup(); } #endif /* CONFIG_IMAGEBOOT */ /* set initial time; all other resource data is already zero'ed */ microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start); #if DEVFS { char mounthere[] = "/dev"; /* !const because of internal casting */ bsd_init_kprintf("calling devfs_kernel_mount\n"); devfs_kernel_mount(mounthere); } #endif /* DEVFS */ /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); siginit(kernproc); bsd_init_kprintf("calling bsd_utaskbootstrap\n"); bsd_utaskbootstrap(); #if defined(__LP64__) kernproc->p_flag |= P_LP64; #endif pal_kernel_announce(); bsd_init_kprintf("calling mountroot_post_hook\n"); /* invoke post-root-mount hook */ if (mountroot_post_hook != NULL) mountroot_post_hook(); #if 0 /* not yet */ consider_zone_gc(FALSE); #endif bsd_init_kprintf("done\n"); }
static errno_t fuse_vfsop_mount(mount_t mp, __unused vnode_t devvp, user_addr_t udata, vfs_context_t context) { int err = 0; int mntopts = 0; bool mounted = false; uint32_t drandom = 0; uint32_t max_read = ~0; size_t len; fuse_device_t fdev = NULL; struct fuse_data *data = NULL; fuse_mount_args fusefs_args; struct vfsstatfs *vfsstatfsp = vfs_statfs(mp); kern_return_t kr; thread_t init_thread; #if M_OSXFUSE_ENABLE_BIG_LOCK fuse_biglock_t *biglock; #endif fuse_trace_printf_vfsop(); if (vfs_isupdate(mp)) { return ENOTSUP; } err = copyin(udata, &fusefs_args, sizeof(fusefs_args)); if (err) { return EINVAL; } /* * Interesting flags that we can receive from mount or may want to * otherwise forcibly set include: * * MNT_ASYNC * MNT_AUTOMOUNTED * MNT_DEFWRITE * MNT_DONTBROWSE * MNT_IGNORE_OWNERSHIP * MNT_JOURNALED * MNT_NODEV * MNT_NOEXEC * MNT_NOSUID * MNT_NOUSERXATTR * MNT_RDONLY * MNT_SYNCHRONOUS * MNT_UNION */ #if M_OSXFUSE_ENABLE_UNSUPPORTED vfs_setlocklocal(mp); #endif /* M_OSXFUSE_ENABLE_UNSUPPORTED */ /** Option Processing. **/ if (fusefs_args.altflags & FUSE_MOPT_FSTYPENAME) { size_t typenamelen = strlen(fusefs_args.fstypename); if ((typenamelen <= 0) || (typenamelen > FUSE_FSTYPENAME_MAXLEN)) { return EINVAL; } snprintf(vfsstatfsp->f_fstypename, MFSTYPENAMELEN, "%s%s", OSXFUSE_FSTYPENAME_PREFIX, fusefs_args.fstypename); } if ((fusefs_args.daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) || (fusefs_args.daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT)) { return EINVAL; } if (fusefs_args.altflags & FUSE_MOPT_SPARSE) { mntopts |= FSESS_SPARSE; } if (fusefs_args.altflags & FUSE_MOPT_SLOW_STATFS) { mntopts |= FSESS_SLOW_STATFS; } if (fusefs_args.altflags & FUSE_MOPT_AUTO_CACHE) { mntopts |= FSESS_AUTO_CACHE; } if (fusefs_args.altflags & FUSE_MOPT_AUTO_XATTR) { if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) { return EINVAL; } mntopts |= FSESS_AUTO_XATTR; } else if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) { mntopts |= FSESS_NATIVE_XATTR; } if (fusefs_args.altflags & FUSE_MOPT_NO_BROWSE) { vfs_setflags(mp, MNT_DONTBROWSE); } if (fusefs_args.altflags & FUSE_MOPT_JAIL_SYMLINKS) { mntopts |= FSESS_JAIL_SYMLINKS; } /* * Note that unlike Linux, which keeps allow_root in user-space and * passes allow_other in that case to the kernel, we let allow_root * reach the kernel. The 'if' ordering is important here. */ if (fusefs_args.altflags & FUSE_MOPT_ALLOW_ROOT) { int is_member = 0; if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) == 0) && is_member) { mntopts |= FSESS_ALLOW_ROOT; } else { IOLog("OSXFUSE: caller not a member of OSXFUSE admin group (%d)\n", fuse_admin_group); return EPERM; } } else if (fusefs_args.altflags & FUSE_MOPT_ALLOW_OTHER) { if (!fuse_allow_other && !fuse_vfs_context_issuser(context)) { int is_member = 0; if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) { return EPERM; } } mntopts |= FSESS_ALLOW_OTHER; } if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEDOUBLE) { mntopts |= FSESS_NO_APPLEDOUBLE; } if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEXATTR) { mntopts |= FSESS_NO_APPLEXATTR; } if ((fusefs_args.altflags & FUSE_MOPT_FSID) && (fusefs_args.fsid != 0)) { fsid_t fsid; mount_t other_mp; uint32_t target_dev; target_dev = FUSE_MAKEDEV(FUSE_CUSTOM_FSID_DEVICE_MAJOR, fusefs_args.fsid); fsid.val[0] = target_dev; fsid.val[1] = FUSE_CUSTOM_FSID_VAL1; other_mp = vfs_getvfs(&fsid); if (other_mp != NULL) { return EPERM; } vfsstatfsp->f_fsid.val[0] = target_dev; vfsstatfsp->f_fsid.val[1] = FUSE_CUSTOM_FSID_VAL1; } else { vfs_getnewfsid(mp); } if (fusefs_args.altflags & FUSE_MOPT_NO_LOCALCACHES) { mntopts |= FSESS_NO_ATTRCACHE; mntopts |= FSESS_NO_READAHEAD; mntopts |= FSESS_NO_UBC; mntopts |= FSESS_NO_VNCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NO_ATTRCACHE) { mntopts |= FSESS_NO_ATTRCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NO_READAHEAD) { mntopts |= FSESS_NO_READAHEAD; } if (fusefs_args.altflags & (FUSE_MOPT_NO_UBC | FUSE_MOPT_DIRECT_IO)) { mntopts |= FSESS_NO_UBC; } if (fusefs_args.altflags & FUSE_MOPT_NO_VNCACHE) { mntopts |= FSESS_NO_VNCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NEGATIVE_VNCACHE) { if (mntopts & FSESS_NO_VNCACHE) { return EINVAL; } mntopts |= FSESS_NEGATIVE_VNCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCWRITES) { /* Cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'. */ if (mntopts & (FSESS_NO_READAHEAD | FSESS_NO_UBC)) { return EINVAL; } mntopts |= FSESS_NO_SYNCWRITES; vfs_clearflags(mp, MNT_SYNCHRONOUS); vfs_setflags(mp, MNT_ASYNC); /* We check for this only if we have nosyncwrites in the first place. */ if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCONCLOSE) { mntopts |= FSESS_NO_SYNCONCLOSE; } } else { vfs_clearflags(mp, MNT_ASYNC); vfs_setflags(mp, MNT_SYNCHRONOUS); } if (mntopts & FSESS_NO_UBC) { /* If no buffer cache, disallow exec from file system. */ vfs_setflags(mp, MNT_NOEXEC); } vfs_setauthopaque(mp); vfs_setauthopaqueaccess(mp); if ((fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) && (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS)) { return EINVAL; } if (fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) { mntopts |= FSESS_DEFAULT_PERMISSIONS; vfs_clearauthopaque(mp); } if (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS) { mntopts |= FSESS_DEFER_PERMISSIONS; } if (fusefs_args.altflags & FUSE_MOPT_EXTENDED_SECURITY) { mntopts |= FSESS_EXTENDED_SECURITY; vfs_setextendedsecurity(mp); } if (fusefs_args.altflags & FUSE_MOPT_LOCALVOL) { mntopts |= FSESS_LOCALVOL; vfs_setflags(mp, MNT_LOCAL); } /* done checking incoming option bits */ err = 0; vfs_setfsprivate(mp, NULL); fdev = fuse_device_get(fusefs_args.rdev); if (!fdev) { return EINVAL; } fuse_device_lock(fdev); drandom = fuse_device_get_random(fdev); if (fusefs_args.random != drandom) { fuse_device_unlock(fdev); IOLog("OSXFUSE: failing mount because of mismatched random\n"); return EINVAL; } data = fuse_device_get_mpdata(fdev); if (!data) { fuse_device_unlock(fdev); return ENXIO; } #if M_OSXFUSE_ENABLE_BIG_LOCK biglock = data->biglock; fuse_biglock_lock(biglock); #endif if (data->mount_state != FM_NOTMOUNTED) { #if M_OSXFUSE_ENABLE_BIG_LOCK fuse_biglock_unlock(biglock); #endif fuse_device_unlock(fdev); return EALREADY; } if (!(data->dataflags & FSESS_OPENED)) { fuse_device_unlock(fdev); err = ENXIO; goto out; } data->mount_state = FM_MOUNTED; OSAddAtomic(1, (SInt32 *)&fuse_mount_count); mounted = true; if (fdata_dead_get(data)) { fuse_device_unlock(fdev); err = ENOTCONN; goto out; } if (!data->daemoncred) { panic("OSXFUSE: daemon found but identity unknown"); } if (fuse_vfs_context_issuser(context) && kauth_cred_getuid(vfs_context_ucred(context)) != kauth_cred_getuid(data->daemoncred)) { fuse_device_unlock(fdev); err = EPERM; goto out; } data->mp = mp; data->fdev = fdev; data->dataflags |= mntopts; data->daemon_timeout.tv_sec = fusefs_args.daemon_timeout; data->daemon_timeout.tv_nsec = 0; if (data->daemon_timeout.tv_sec) { data->daemon_timeout_p = &(data->daemon_timeout); } else { data->daemon_timeout_p = (struct timespec *)0; } data->max_read = max_read; data->fssubtype = fusefs_args.fssubtype; data->mountaltflags = fusefs_args.altflags; data->noimplflags = (uint64_t)0; data->blocksize = fuse_round_size(fusefs_args.blocksize, FUSE_MIN_BLOCKSIZE, FUSE_MAX_BLOCKSIZE); data->iosize = fuse_round_size(fusefs_args.iosize, FUSE_MIN_IOSIZE, FUSE_MAX_IOSIZE); if (data->iosize < data->blocksize) { data->iosize = data->blocksize; } data->userkernel_bufsize = FUSE_DEFAULT_USERKERNEL_BUFSIZE; copystr(fusefs_args.fsname, vfsstatfsp->f_mntfromname, MNAMELEN - 1, &len); bzero(vfsstatfsp->f_mntfromname + len, MNAMELEN - len); copystr(fusefs_args.volname, data->volname, MAXPATHLEN - 1, &len); bzero(data->volname + len, MAXPATHLEN - len); /* previous location of vfs_setioattr() */ vfs_setfsprivate(mp, data); fuse_device_unlock(fdev); /* Send a handshake message to the daemon. */ kr = kernel_thread_start(fuse_internal_init, data, &init_thread); if (kr != KERN_SUCCESS) { IOLog("OSXFUSE: could not start init thread\n"); err = ENOTCONN; } else { thread_deallocate(init_thread); } out: if (err) { vfs_setfsprivate(mp, NULL); fuse_device_lock(fdev); data = fuse_device_get_mpdata(fdev); /* again */ if (mounted) { OSAddAtomic(-1, (SInt32 *)&fuse_mount_count); } if (data) { data->mount_state = FM_NOTMOUNTED; if (!(data->dataflags & FSESS_OPENED)) { #if M_OSXFUSE_ENABLE_BIG_LOCK assert(biglock == data->biglock); fuse_biglock_unlock(biglock); #endif fuse_device_close_final(fdev); /* data is gone now */ } } fuse_device_unlock(fdev); } else { vnode_t fuse_rootvp = NULLVP; err = fuse_vfsop_root(mp, &fuse_rootvp, context); if (err) { goto out; /* go back and follow error path */ } err = vnode_ref(fuse_rootvp); #if M_OSXFUSE_ENABLE_BIG_LOCK /* * Even though fuse_rootvp will not be reclaimed when calling vnode_put * because we incremented its usecount by calling vnode_ref release * biglock just to be safe. */ fuse_biglock_unlock(biglock); #endif /* M_OSXFUSE_ENABLE_BIG_LOCK */ (void)vnode_put(fuse_rootvp); #if M_OSXFUSE_ENABLE_BIG_LOCK fuse_biglock_lock(biglock); #endif if (err) { goto out; /* go back and follow error path */ } else { struct vfsioattr ioattr; vfs_ioattr(mp, &ioattr); ioattr.io_maxreadcnt = ioattr.io_maxwritecnt = data->iosize; ioattr.io_segreadcnt = ioattr.io_segwritecnt = data->iosize / PAGE_SIZE; ioattr.io_maxsegreadsize = ioattr.io_maxsegwritesize = data->iosize; ioattr.io_devblocksize = data->blocksize; vfs_setioattr(mp, &ioattr); } } #if M_OSXFUSE_ENABLE_BIG_LOCK fuse_device_lock(fdev); data = fuse_device_get_mpdata(fdev); /* ...and again */ if(data) { assert(data->biglock == biglock); fuse_biglock_unlock(biglock); } fuse_device_unlock(fdev); #endif return err; }
/* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. */ int nfs_nget( mount_t mp, nfsnode_t dnp, struct componentname *cnp, u_char *fhp, int fhsize, struct nfs_vattr *nvap, u_int64_t *xidp, uint32_t auth, int flags, nfsnode_t *npp) { nfsnode_t np; struct nfsnodehashhead *nhpp; vnode_t vp; int error, nfsvers; mount_t mp2; struct vnode_fsparam vfsp; uint32_t vid; FSDBG_TOP(263, mp, dnp, flags, npp); /* Check for unmount in progress */ if (!mp || vfs_isforce(mp)) { *npp = NULL; error = ENXIO; FSDBG_BOT(263, mp, dnp, 0xd1e, error); return (error); } nfsvers = VFSTONFS(mp)->nm_vers; nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); loop: lck_mtx_lock(nfs_node_hash_mutex); for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { mp2 = (np->n_hflag & NHINIT) ? np->n_mount : NFSTOMP(np); if (mp != mp2 || np->n_fhsize != fhsize || bcmp(fhp, np->n_fhp, fhsize)) continue; if (nvap && (nvap->nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) && cnp && (cnp->cn_namelen > (fhsize - (int)sizeof(dnp)))) { /* The name was too long to fit in the file handle. Check it against the node's name. */ int namecmp = 0; const char *vname = vnode_getname(NFSTOV(np)); if (vname) { if (cnp->cn_namelen != (int)strlen(vname)) namecmp = 1; else namecmp = strncmp(vname, cnp->cn_nameptr, cnp->cn_namelen); vnode_putname(vname); } if (namecmp) /* full name didn't match */ continue; } FSDBG(263, dnp, np, np->n_flag, 0xcace0000); /* if the node is locked, sleep on it */ if ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE)) { np->n_hflag |= NHLOCKWANT; FSDBG(263, dnp, np, np->n_flag, 0xcace2222); msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL); FSDBG(263, dnp, np, np->n_flag, 0xcace3333); goto loop; } vp = NFSTOV(np); vid = vnode_vid(vp); lck_mtx_unlock(nfs_node_hash_mutex); if ((error = vnode_getwithvid(vp, vid))) { /* * If vnode is being reclaimed or has already * changed identity, no need to wait. */ FSDBG_BOT(263, dnp, *npp, 0xcace0d1e, error); return (error); } if ((error = nfs_node_lock(np))) { /* this only fails if the node is now unhashed */ /* so let's see if we can find/create it again */ FSDBG(263, dnp, *npp, 0xcaced1e2, error); vnode_put(vp); if (flags & NG_NOCREATE) { *npp = 0; FSDBG_BOT(263, dnp, *npp, 0xcaced1e0, ENOENT); return (ENOENT); } goto loop; } /* update attributes */ if (nvap) error = nfs_loadattrcache(np, nvap, xidp, 0); if (error) { nfs_node_unlock(np); vnode_put(vp); } else { if (dnp && cnp && (flags & NG_MAKEENTRY)) cache_enter(NFSTOV(dnp), vp, cnp); /* * Update the vnode if the name/and or the parent has * changed. We need to do this so that if getattrlist is * called asking for ATTR_CMN_NAME, that the "most" * correct name is being returned. In addition for * monitored vnodes we need to kick the vnode out of the * name cache. We do this so that if there are hard * links in the same directory the link will not be * found and a lookup will get us here to return the * name of the current link. In addition by removing the * name from the name cache the old name will not be * found after a rename done on another client or the * server. The principle reason to do this is because * Finder is asking for notifications on a directory. * The directory changes, Finder gets notified, reads * the directory (which we have purged) and for each * entry returned calls getattrlist with the name * returned from readdir. gettattrlist has to call * namei/lookup to resolve the name, because its not in * the cache we end up here. We need to update the name * so Finder will get the name it called us with. * * We had an imperfect solution with respect to case * sensitivity. There is a test that is run in * FileBuster that does renames from some name to * another name differing only in case. It then reads * the directory looking for the new name, after it * finds that new name, it ask gettattrlist to verify * that the name is the new name. Usually that works, * but renames generate fsevents and fseventsd will do a * lookup on the name via lstat. Since that test renames * old name to new name back and forth there is a race * that an fsevent will be behind and will access the * file by the old name, on a case insensitive file * system that will work. Problem is if we do a case * sensitive compare, we're going to change the name, * which the test's getattrlist verification step is * going to fail. So we will check the case sensitivity * of the file system and do the appropriate compare. In * a rare instance for non homogeneous file systems * w.r.t. pathconf we will use case sensitive compares. * That could break if the file system is actually case * insensitive. * * Note that V2 does not know the case, so we just * assume case sensitivity. * * This is clearly not perfect due to races, but this is * as good as its going to get. You can defeat the * handling of hard links simply by doing: * * while :; do ls -l > /dev/null; done * * in a terminal window. Even a single ls -l can cause a * race. * * <rant>What we really need is for the caller, that * knows the name being used is valid since it got it * from a readdir to use that name and not ask for the * ATTR_CMN_NAME</rant> */ if (dnp && cnp && (vp != NFSTOV(dnp))) { int update_flags = (vnode_ismonitored((NFSTOV(dnp)))) ? VNODE_UPDATE_CACHE : 0; int (*cmp)(const char *s1, const char *s2, size_t n); cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp; if (vp->v_name && cnp->cn_namelen && (*cmp)(cnp->cn_nameptr, vp->v_name, cnp->cn_namelen)) update_flags |= VNODE_UPDATE_NAME; if ((vp->v_name == NULL && cnp->cn_namelen != 0) || (vp->v_name != NULL && cnp->cn_namelen == 0)) update_flags |= VNODE_UPDATE_NAME; if (vnode_parent(vp) != NFSTOV(dnp)) update_flags |= VNODE_UPDATE_PARENT; if (update_flags) { NFS_NODE_DBG("vnode_update_identity old name %s new name %.*s update flags = %x\n", vp->v_name, cnp->cn_namelen, cnp->cn_nameptr ? cnp->cn_nameptr : "", update_flags); vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cnp->cn_namelen, 0, update_flags); } } *npp = np; } FSDBG_BOT(263, dnp, *npp, 0xcace0000, error); return(error); } FSDBG(263, mp, dnp, npp, 0xaaaaaaaa); if (flags & NG_NOCREATE) { lck_mtx_unlock(nfs_node_hash_mutex); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT); return (ENOENT); } /* * allocate and initialize nfsnode and stick it in the hash * before calling getnewvnode(). Anyone finding it in the * hash before initialization is complete will wait for it. */ MALLOC_ZONE(np, nfsnode_t, sizeof *np, M_NFSNODE, M_WAITOK); if (!np) { lck_mtx_unlock(nfs_node_hash_mutex); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOMEM); return (ENOMEM); } bzero(np, sizeof *np); np->n_hflag |= (NHINIT | NHLOCKED); np->n_mount = mp; np->n_auth = auth; TAILQ_INIT(&np->n_opens); TAILQ_INIT(&np->n_lock_owners); TAILQ_INIT(&np->n_locks); np->n_dlink.tqe_next = NFSNOLIST; np->n_dreturn.tqe_next = NFSNOLIST; np->n_monlink.le_next = NFSNOLIST; /* ugh... need to keep track of ".zfs" directories to workaround server bugs */ if ((nvap->nva_type == VDIR) && cnp && (cnp->cn_namelen == 4) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == 'z') && (cnp->cn_nameptr[2] == 'f') && (cnp->cn_nameptr[3] == 's')) np->n_flag |= NISDOTZFS; if (dnp && (dnp->n_flag & NISDOTZFS)) np->n_flag |= NISDOTZFSCHILD; if (dnp && cnp && ((cnp->cn_namelen != 2) || (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) { vnode_t dvp = NFSTOV(dnp); if (!vnode_get(dvp)) { if (!vnode_ref(dvp)) np->n_parent = dvp; vnode_put(dvp); } } /* setup node's file handle */ if (fhsize > NFS_SMALLFH) { MALLOC_ZONE(np->n_fhp, u_char *, fhsize, M_NFSBIGFH, M_WAITOK); if (!np->n_fhp) { lck_mtx_unlock(nfs_node_hash_mutex); FREE_ZONE(np, sizeof *np, M_NFSNODE); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000002, ENOMEM); return (ENOMEM); } } else {
static int vfs_mount_9p(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) { #pragma unused(devvp) struct sockaddr *addr, *authaddr; struct vfsstatfs *sp; char authkey[DESKEYLEN+1]; kauth_cred_t cred; user_args_9p args; mount_9p *nmp; size_t size; fid_9p fid; qid_9p qid; char *vers; int e; TRACE(); nmp = NULL; addr = NULL; authaddr = NULL; fid = NOFID; if (vfs_isupdate(mp)) return ENOTSUP; if (vfs_context_is64bit(ctx)) { if ((e=copyin(data, &args, sizeof(args)))) goto error; } else { args_9p args32; if ((e=copyin(data, &args32, sizeof(args32)))) goto error; args.spec = CAST_USER_ADDR_T(args32.spec); args.addr = CAST_USER_ADDR_T(args32.addr); args.addrlen = args32.addrlen; args.authaddr = CAST_USER_ADDR_T(args32.authaddr); args.authaddrlen = args32.authaddrlen; args.volume = CAST_USER_ADDR_T(args32.volume); args.uname = CAST_USER_ADDR_T(args32.uname); args.aname = CAST_USER_ADDR_T(args32.aname); args.authkey = CAST_USER_ADDR_T(args32.authkey); args.flags = args32.flags; } e = ENOMEM; nmp = malloc_9p(sizeof(*nmp)); if (nmp == NULL) return e; nmp->mp = mp; TAILQ_INIT(&nmp->req); nmp->lck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL); nmp->reqlck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL); nmp->nodelck = lck_mtx_alloc_init(lck_grp_9p, LCK_ATTR_NULL); nmp->node = hashinit(desiredvnodes, M_TEMP, &nmp->nodelen); if (nmp->lck==NULL || nmp->reqlck==NULL || nmp->nodelck==NULL || nmp->node==NULL) goto error; if ((e=nameget_9p(args.volume, &nmp->volume))) goto error; if ((e=nameget_9p(args.uname, &nmp->uname))) goto error; if ((e=nameget_9p(args.aname, &nmp->aname))) goto error; cred = vfs_context_ucred(ctx); if (IS_VALID_CRED(cred)) { nmp->uid = kauth_cred_getuid(cred); nmp->gid = kauth_cred_getgid(cred); } else { nmp->uid = KAUTH_UID_NONE; nmp->gid = KAUTH_GID_NONE; } vfs_getnewfsid(mp); vfs_setfsprivate(mp, nmp); nmp->flags = args.flags; if ((e=addrget_9p(args.addr, args.addrlen, &addr))) goto error; if ((e=connect_9p(nmp, addr))) goto error; vers = VERSION9P; if (ISSET(nmp->flags, FLAG_DOTU)) vers = VERSION9PDOTU; if ((e=version_9p(nmp, vers, &nmp->version))) goto error; if (ISSET(nmp->flags, FLAG_DOTU) && strcmp(VERSION9PDOTU, nmp->version)==0) SET(nmp->flags, F_DOTU); nmp->afid = NOFID; if (args.authaddr && args.authaddrlen && args.authkey) { if ((e=copyin(args.authkey, authkey, DESKEYLEN))) goto error; if ((e=addrget_9p(args.authaddr, args.authaddrlen, &authaddr))) goto error; if ((e=auth_9p(nmp, nmp->uname, nmp->aname, nmp->uid, &nmp->afid, &qid))) goto error; if (nmp->afid!=NOFID && (e=authp9any_9p(nmp, nmp->afid, authaddr, nmp->uname, authkey))) goto error; bzero(authkey, DESKEYLEN); } if ((e=attach_9p(nmp, nmp->uname, nmp->aname, nmp->afid, nmp->uid, &fid, &qid))) goto error; if ((e=nget_9p(nmp, fid, qid, NULL, &nmp->root, NULL, ctx))) goto error; nunlock_9p(NTO9P(nmp->root)); e = vnode_ref(nmp->root); vnode_put(nmp->root); if (e) goto error; vfs_setauthopaque(mp); vfs_clearauthopaqueaccess(mp); vfs_setlocklocal(mp); // init stats sp = vfs_statfs(nmp->mp); copyinstr(args.spec, sp->f_mntfromname, MNAMELEN-1, &size); bzero(sp->f_mntfromname+size, MNAMELEN-size); sp->f_bsize = PAGE_SIZE; sp->f_iosize = nmp->msize-IOHDRSZ; sp->f_blocks = sp->f_bfree = sp->f_bavail = sp->f_bused = 0; sp->f_files = 65535; sp->f_ffree = sp->f_files-2; sp->f_flags = vfs_flags(mp); free_9p(addr); free_9p(authaddr); return 0; error: bzero(authkey, DESKEYLEN); free_9p(addr); free_9p(authaddr); if (nmp->so) { clunk_9p(nmp, fid); disconnect_9p(nmp); } freemount_9p(nmp); vfs_setfsprivate(mp, NULL); return e; }
/* works like PFlushVolumeData */ void darwin_notify_perms(struct unixuser *auser, int event) { int i; struct afs_q *tq, *uq = NULL; struct vcache *tvc, *hnext; int isglock = ISAFS_GLOCK(); struct vnode *vp; struct vnode_attr va; int isctxtowner = 0; if (!afs_darwin_fsevents) return; VATTR_INIT(&va); VATTR_SET(&va, va_mode, 0777); if (event & UTokensObtained) VATTR_SET(&va, va_uid, auser->uid); else VATTR_SET(&va, va_uid, -2); /* nobody */ if (!isglock) AFS_GLOCK(); if (!(vfs_context_owner == current_thread())) { get_vfs_context(); isctxtowner = 1; } loop: ObtainReadLock(&afs_xvcache); for (i = 0; i < VCSIZE; i++) { for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) { uq = QPrev(tq); tvc = QTOVH(tq); if (tvc->f.states & CDeadVnode) { /* we can afford to be best-effort */ continue; } /* no per-file acls, so only notify on directories */ if (!(vp = AFSTOV(tvc)) || !vnode_isdir(AFSTOV(tvc))) continue; /* dynroot object. no callbacks. anonymous ACL. just no. */ if (afs_IsDynrootFid(&tvc->f.fid)) continue; /* no fake fsevents on mount point sources. leaks refs */ if (tvc->mvstat == 1) continue; /* if it's being reclaimed, just pass */ if (vnode_get(vp)) continue; if (vnode_ref(vp)) { AFS_GUNLOCK(); vnode_put(vp); AFS_GLOCK(); continue; } ReleaseReadLock(&afs_xvcache); /* Avoid potentially re-entering on this lock */ if (0 == NBObtainWriteLock(&tvc->lock, 234)) { tvc->f.states |= CEvent; AFS_GUNLOCK(); vnode_setattr(vp, &va, afs_osi_ctxtp); tvc->f.states &= ~CEvent; vnode_put(vp); AFS_GLOCK(); ReleaseWriteLock(&tvc->lock); } ObtainReadLock(&afs_xvcache); uq = QPrev(tq); /* our tvc ptr is still good until now */ AFS_FAST_RELE(tvc); } } ReleaseReadLock(&afs_xvcache); if (isctxtowner) put_vfs_context(); if (!isglock) AFS_GUNLOCK(); }
struct vcache * osi_dnlc_lookup(struct vcache *adp, char *aname, int locktype) { struct vcache *tvc; int LRUme; unsigned int key, skey; char *ts = aname; struct nc *tnc, *tnc1 = 0; int safety; #ifdef AFS_DARWIN80_ENV vnode_t tvp; #endif ma_critical_enter(); if (!afs_usednlc) { ma_critical_exit(); return 0; } dnlcHash(ts, key); /* leaves ts pointing at the NULL */ if (ts - aname >= AFSNCNAMESIZE) { ma_critical_exit(); return 0; } skey = key & (NHSIZE - 1); TRACE(osi_dnlc_lookupT, skey); dnlcstats.lookups++; ObtainReadLock(&afs_xvcache); ObtainReadLock(&afs_xdnlc); for (tvc = NULL, tnc = nameHash[skey], safety = 0; tnc; tnc = tnc->next, safety++) { if ( /* (tnc->key == key) && */ (tnc->dirp == adp) && (!strcmp((char *)tnc->name, aname))) { tvc = tnc->vp; tnc1 = tnc; break; } else if (tnc->next == nameHash[skey]) { /* end of list */ break; } else if (safety > NCSIZE) { afs_warn("DNLC cycle"); dnlcstats.cycles++; ReleaseReadLock(&afs_xdnlc); ReleaseReadLock(&afs_xvcache); osi_dnlc_purge(); ma_critical_exit(); return (0); } } LRUme = 0; /* (tnc != nameHash[skey]); */ ReleaseReadLock(&afs_xdnlc); if (!tvc) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; } else { if ((tvc->f.states & CVInit) #ifdef AFS_DARWIN80_ENV ||(tvc->f.states & CDeadVnode) #endif ) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); ma_critical_exit(); return 0; } #if defined(AFS_DARWIN80_ENV) tvp = AFSTOV(tvc); if (vnode_get(tvp)) { ReleaseReadLock(&afs_xvcache); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); ma_critical_exit(); return 0; } if (vnode_ref(tvp)) { ReleaseReadLock(&afs_xvcache); AFS_GUNLOCK(); vnode_put(tvp); AFS_GLOCK(); dnlcstats.misses++; osi_dnlc_remove(adp, aname, tvc); ma_critical_exit(); return 0; } #elif defined(AFS_FBSD_ENV) /* can't sleep in a critical section */ ma_critical_exit(); osi_vnhold(tvc, 0); ma_critical_enter(); #else osi_vnhold(tvc, 0); #endif ReleaseReadLock(&afs_xvcache); #ifdef notdef /* * XX If LRUme ever is non-zero change the if statement around because * aix's cc with optimizer on won't necessarily check things in order XX */ if (LRUme && (0 == NBObtainWriteLock(&afs_xdnlc))) { /* don't block to do this */ /* tnc might have been moved during race condition, */ /* but it's always in a legit hash chain when a lock is granted, * or else it's on the freelist so prev == NULL, * so at worst this is redundant */ /* Now that we've got it held, and a lock on the dnlc, we * should check to be sure that there was no race, and * bail out if there was. */ if (tnc->prev) { /* special case for only two elements on list - relative ordering * doesn't change */ if (tnc->prev != tnc->next) { /* remove from old location */ tnc->prev->next = tnc->next; tnc->next->prev = tnc->prev; /* insert into new location */ tnc->next = nameHash[skey]; tnc->prev = tnc->next->prev; tnc->next->prev = tnc; tnc->prev->next = tnc; } nameHash[skey] = tnc; } ReleaseWriteLock(&afs_xdnlc); } #endif } ma_critical_exit(); return tvc; }
/* * This function is called very early on in the Mach startup, from the * function start_kernel_threads() in osfmk/kern/startup.c. It's called * in the context of the current (startup) task using a call to the * function kernel_thread_create() to jump into start_kernel_threads(). * Internally, kernel_thread_create() calls thread_create_internal(), * which calls uthread_alloc(). The function of uthread_alloc() is * normally to allocate a uthread structure, and fill out the uu_sigmask, * uu_context fields. It skips filling these out in the case of the "task" * being "kernel_task", because the order of operation is inverted. To * account for that, we need to manually fill in at least the contents * of the uu_context.vc_ucred field so that the uthread structure can be * used like any other. */ void bsd_init(void) { struct uthread *ut; unsigned int i; #if __i386__ || __x86_64__ int error; #endif struct vfs_context context; kern_return_t ret; struct ucred temp_cred; #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */ kernel_flock = funnel_alloc(KERNEL_FUNNEL); if (kernel_flock == (funnel_t *)0 ) { panic("bsd_init: Failed to allocate kernel funnel"); } printf(copyright); bsd_init_kprintf("calling kmeminit\n"); kmeminit(); bsd_init_kprintf("calling parse_bsd_args\n"); parse_bsd_args(); /* Initialize kauth subsystem before instancing the first credential */ bsd_init_kprintf("calling kauth_init\n"); kauth_init(); /* Initialize process and pgrp structures. */ bsd_init_kprintf("calling procinit\n"); procinit(); /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/ tty_init(); kernproc = &proc0; /* implicitly bzero'ed */ /* kernel_task->proc = kernproc; */ set_bsdtask_info(kernel_task,(void *)kernproc); /* give kernproc a name */ bsd_init_kprintf("calling process_name\n"); process_name("kernel_task", kernproc); /* allocate proc lock group attribute and group */ bsd_init_kprintf("calling lck_grp_attr_alloc_init\n"); proc_lck_grp_attr= lck_grp_attr_alloc_init(); proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); #ifndef CONFIG_EMBEDDED proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr); proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr); proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr); #endif /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); #if 0 #if __PROC_INTERNAL_DEBUG lck_attr_setdebug(proc_lck_attr); #endif #endif #ifdef CONFIG_EMBEDDED proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr); #else proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr); #endif execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); execargs_cache_size = bsd_simul_execs; execargs_free_count = bsd_simul_execs; execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t)); bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t)); if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); bsd_init_kprintf("calling get_bsdthread_info\n"); ut = (uthread_t)get_bsdthread_info(current_thread()); #if CONFIG_MACF /* * Initialize the MAC Framework */ mac_policy_initbsd(); kernproc->p_mac_enforce = 0; #endif /* MAC */ /* * Create process 0. */ proc_list_lock(); LIST_INSERT_HEAD(&allproc, kernproc, p_list); kernproc->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); #ifdef CONFIG_EMBEDDED lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); #else lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr); #endif /* There is no other bsd thread this point and is safe without pgrp lock */ LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist); kernproc->p_listflag |= P_LIST_INPGRP; kernproc->p_pgrpid = 0; pgrp0.pg_session = &session0; pgrp0.pg_membercnt = 1; session0.s_count = 1; session0.s_leader = kernproc; session0.s_listflags = 0; #ifdef CONFIG_EMBEDDED lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr); #else lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr); #endif LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); #if CONFIG_LCTX kernproc->p_lctx = NULL; #endif kernproc->task = kernel_task; kernproc->p_stat = SRUN; kernproc->p_flag = P_SYSTEM; kernproc->p_nice = NZERO; kernproc->p_pptr = kernproc; TAILQ_INIT(&kernproc->p_uthlist); TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list); kernproc->sigwait = FALSE; kernproc->sigwait_thread = THREAD_NULL; kernproc->exit_thread = THREAD_NULL; kernproc->p_csflags = CS_VALID; /* * Create credential. This also Initializes the audit information. */ bsd_init_kprintf("calling bzero\n"); bzero(&temp_cred, sizeof(temp_cred)); temp_cred.cr_ngroups = 1; temp_cred.cr_audit.as_aia_p = &audit_default_aia; /* XXX the following will go away with cr_au */ temp_cred.cr_au.ai_auid = AU_DEFAUDITID; bsd_init_kprintf("calling kauth_cred_create\n"); kernproc->p_ucred = kauth_cred_create(&temp_cred); /* give the (already exisiting) initial thread a reference on it */ bsd_init_kprintf("calling kauth_cred_ref\n"); kauth_cred_ref(kernproc->p_ucred); ut->uu_context.vc_ucred = kernproc->p_ucred; ut->uu_context.vc_thread = current_thread(); TAILQ_INIT(&kernproc->p_aio_activeq); TAILQ_INIT(&kernproc->p_aio_doneq); kernproc->p_aio_total_count = 0; kernproc->p_aio_active_count = 0; bsd_init_kprintf("calling file_lock_init\n"); file_lock_init(); #if CONFIG_MACF mac_cred_label_associate_kernel(kernproc->p_ucred); mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task); #endif /* Create the file descriptor table. */ filedesc0.fd_refcnt = 1+1; /* +1 so shutdown will not _FREE_ZONE */ kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; filedesc0.fd_knlist = NULL; filedesc0.fd_knhash = NULL; filedesc0.fd_knhashmask = 0; /* Create the limits structures. */ kernproc->p_limit = &limit0; for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++) limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid; limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack; limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data; limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core; limit0.pl_refcnt = 1; kernproc->p_stats = &pstats0; kernproc->p_sigacts = &sigacts0; /* * Charge root for two processes: init and mach_init. */ bsd_init_kprintf("calling chgproccnt\n"); (void)chgproccnt(0, 1); /* * Allocate a kernel submap for pageable memory * for temporary copying (execve()). */ { vm_offset_t minimum; bsd_init_kprintf("calling kmem_suballoc\n"); ret = kmem_suballoc(kernel_map, &minimum, (vm_size_t)bsd_pageable_map_size, TRUE, VM_FLAGS_ANYWHERE, &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); } /* * Initialize buffers and hash links for buffers * * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must * happen after a credential has been associated with * the kernel task. */ bsd_init_kprintf("calling bsd_bufferinit\n"); bsd_bufferinit(); /* Initialize the execve() semaphore */ bsd_init_kprintf("calling semaphore_create\n"); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to create execve semaphore"); /* * Initialize the calendar. */ bsd_init_kprintf("calling IOKitInitializeTime\n"); IOKitInitializeTime(); if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; start_kern_tracing(new_nkdbufs); if (turn_on_log_leaks) log_leaks = 1; bsd_init_kprintf("calling ubc_init\n"); ubc_init(); /* Initialize the file systems. */ bsd_init_kprintf("calling vfsinit\n"); vfsinit(); #if SOCKETS /* Initialize per-CPU cache allocator */ mcache_init(); /* Initialize mbuf's. */ bsd_init_kprintf("calling mbinit\n"); mbinit(); net_str_id_init(); /* for mbuf tags */ #endif /* SOCKETS */ /* * Initializes security event auditing. * XXX: Should/could this occur later? */ #if CONFIG_AUDIT bsd_init_kprintf("calling audit_init\n"); audit_init(); #endif /* Initialize kqueues */ bsd_init_kprintf("calling knote_init\n"); knote_init(); /* Initialize for async IO */ bsd_init_kprintf("calling aio_init\n"); aio_init(); /* Initialize pipes */ bsd_init_kprintf("calling pipeinit\n"); pipeinit(); /* Initialize SysV shm subsystem locks; the subsystem proper is * initialized through a sysctl. */ #if SYSV_SHM bsd_init_kprintf("calling sysv_shm_lock_init\n"); sysv_shm_lock_init(); #endif #if SYSV_SEM bsd_init_kprintf("calling sysv_sem_lock_init\n"); sysv_sem_lock_init(); #endif #if SYSV_MSG bsd_init_kprintf("sysv_msg_lock_init\n"); sysv_msg_lock_init(); #endif bsd_init_kprintf("calling pshm_lock_init\n"); pshm_lock_init(); bsd_init_kprintf("calling psem_lock_init\n"); psem_lock_init(); pthread_init(); /* POSIX Shm and Sem */ bsd_init_kprintf("calling pshm_cache_init\n"); pshm_cache_init(); bsd_init_kprintf("calling psem_cache_init\n"); psem_cache_init(); bsd_init_kprintf("calling time_zone_slock_init\n"); time_zone_slock_init(); /* Stack snapshot facility lock */ stackshot_lock_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. */ bsd_init_kprintf("calling sysctl_register_fixed\n"); sysctl_register_fixed(); bsd_init_kprintf("calling sysctl_mib_init\n"); sysctl_mib_init(); #if NETWORKING bsd_init_kprintf("calling dlil_init\n"); dlil_init(); bsd_init_kprintf("calling proto_kpi_init\n"); proto_kpi_init(); #endif /* NETWORKING */ #if SOCKETS bsd_init_kprintf("calling socketinit\n"); socketinit(); bsd_init_kprintf("calling domaininit\n"); domaininit(); #endif /* SOCKETS */ kernproc->p_fd->fd_cdir = NULL; kernproc->p_fd->fd_rdir = NULL; #if CONFIG_EMBEDDED /* Initialize kernel memory status notifications */ bsd_init_kprintf("calling kern_memorystatus_init\n"); kern_memorystatus_init(); #endif #ifdef GPROF /* Initialize kernel profiling. */ kmstartup(); #endif /* kick off timeout driven events by calling first time */ thread_wakeup(&lbolt); timeout(lightning_bolt, 0, hz); bsd_init_kprintf("calling bsd_autoconf\n"); bsd_autoconf(); #if CONFIG_DTRACE dtrace_postinit(); #endif /* * We attach the loopback interface *way* down here to ensure * it happens after autoconf(), otherwise it becomes the * "primary" interface. */ #include <loop.h> #if NLOOP > 0 bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif #if PFLOG /* Initialize packet filter log interface */ pfloginit(); #endif /* PFLOG */ #if NETHER > 0 /* Register the built-in dlil ethernet interface family */ bsd_init_kprintf("calling ether_family_init\n"); ether_family_init(); #endif /* ETHER */ #if NETWORKING /* Call any kext code that wants to run just after network init */ bsd_init_kprintf("calling net_init_run\n"); net_init_run(); /* register user tunnel kernel control handler */ utun_register_control(); #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); vnode_pager_bootstrap(); #if 0 /* XXX Hack for early debug stop */ printf("\nabout to sleep for 10 seconds\n"); IOSleep( 10 * 1000 ); /* Debugger("hello"); */ #endif bsd_init_kprintf("calling inittodr\n"); inittodr(0); #if CONFIG_EMBEDDED { /* print out early VM statistics */ kern_return_t kr1; vm_statistics_data_t stat; mach_msg_type_number_t count; count = HOST_VM_INFO_COUNT; kr1 = host_statistics(host_self(), HOST_VM_INFO, (host_info_t)&stat, &count); kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n" "Pages free:\t\t\t%u.\n" "Pages active:\t\t\t%u.\n" "Pages inactive:\t\t\t%u.\n" "Pages wired down:\t\t%u.\n" "\"Translation faults\":\t\t%u.\n" "Pages copy-on-write:\t\t%u.\n" "Pages zero filled:\t\t%u.\n" "Pages reactivated:\t\t%u.\n" "Pageins:\t\t\t%u.\n" "Pageouts:\t\t\t%u.\n" "Object cache: %u hits of %u lookups (%d%% hit rate)\n", stat.free_count, stat.active_count, stat.inactive_count, stat.wire_count, stat.faults, stat.cow_faults, stat.zero_fill_count, stat.reactivations, stat.pageins, stat.pageouts, stat.hits, stat.lookups, (stat.hits == 0) ? 100 : ((stat.lookups * 100) / stat.hits)); } #endif /* CONFIG_EMBEDDED */ /* Mount the root file system. */ while( TRUE) { int err; bsd_init_kprintf("calling setconf\n"); setconf(); bsd_init_kprintf("vfs_mountroot\n"); if (0 == (err = vfs_mountroot())) break; rootdevice[0] = '\0'; #if NFSCLIENT if (mountroot == netboot_mountroot) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: failed to mount network root, error %d, %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } #endif printf("cannot mount root, errno = %d\n", err); boothowto |= RB_ASKNAME; } IOSecureBSDRoot(rootdevice); context.vc_thread = current_thread(); context.vc_ucred = kernproc->p_ucred; mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; bsd_init_kprintf("calling VFS_ROOT\n"); /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context)) panic("bsd_init: cannot find root vnode: %s", PE_boot_args()); rootvnode->v_flag |= VROOT; (void)vnode_ref(rootvnode); (void)vnode_put(rootvnode); filedesc0.fd_cdir = rootvnode; #if NFSCLIENT if (mountroot == netboot_mountroot) { int err; /* post mount setup */ if ((err = netboot_setup()) != 0) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: NetBoot could not find root, error %d: %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } } #endif #if CONFIG_IMAGEBOOT /* * See if a system disk image is present. If so, mount it and * switch the root vnode to point to it */ if(imageboot_needed()) { int err; /* An image was found */ if((err = imageboot_setup())) { /* * this is not fatal. Keep trying to root * off the original media */ printf("%s: imageboot could not find root, %d\n", __FUNCTION__, err); } } #endif /* CONFIG_IMAGEBOOT */ /* set initial time; all other resource data is already zero'ed */ microtime(&kernproc->p_start); kernproc->p_stats->p_start = kernproc->p_start; /* for compat */ #if DEVFS { char mounthere[] = "/dev"; /* !const because of internal casting */ bsd_init_kprintf("calling devfs_kernel_mount\n"); devfs_kernel_mount(mounthere); } #endif /* DEVFS */ /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); siginit(kernproc); bsd_init_kprintf("calling bsd_utaskbootstrap\n"); bsd_utaskbootstrap(); #if defined(__LP64__) kernproc->p_flag |= P_LP64; printf("Kernel is LP64\n"); #endif #if __i386__ || __x86_64__ /* this should be done after the root filesystem is mounted */ error = set_archhandler(kernproc, CPU_TYPE_POWERPC); // 10/30/08 - gab: <rdar://problem/6324501> // if default 'translate' can't be found, see if the understudy is available if (ENOENT == error) { strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN); error = set_archhandler(kernproc, CPU_TYPE_POWERPC); } if (error) /* XXX make more generic */ exec_archhandler_ppc.path[0] = 0; #endif bsd_init_kprintf("calling mountroot_post_hook\n"); /* invoke post-root-mount hook */ if (mountroot_post_hook != NULL) mountroot_post_hook(); #if 0 /* not yet */ consider_zone_gc(FALSE); #endif bsd_init_kprintf("done\n"); }
/* * forkproc * * Description: Create a new process structure, given a parent process * structure. * * Parameters: parent_proc The parent process * * Returns: !NULL The new process structure * NULL Error (insufficient free memory) * * Note: When successful, the newly created process structure is * partially initialized; if a caller needs to deconstruct the * returned structure, they must call forkproc_free() to do so. */ proc_t forkproc(proc_t parent_proc) { proc_t child_proc; /* Our new process */ static int nextpid = 0, pidwrap = 0, nextpidversion = 0; int error = 0; struct session *sessp; uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); if (child_proc == NULL) { printf("forkproc: M_PROC zone exhausted\n"); goto bad; } /* zero it out as we need to insert in hash */ bzero(child_proc, sizeof *child_proc); MALLOC_ZONE(child_proc->p_stats, struct pstats *, sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); if (child_proc->p_stats == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); if (child_proc->p_sigacts == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* allocate a callout for use by interval timers */ child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); if (child_proc->p_rcall == NULL) { FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* * Find an unused PID. */ proc_list_lock(); nextpid++; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = 100; pidwrap = 1; } if (pidwrap != 0) { /* if the pid stays in hash both for zombie and runniing state */ if (pfind_locked(nextpid) != PROC_NULL) { nextpid++; goto retry; } if (pgfind_internal(nextpid) != PGRP_NULL) { nextpid++; goto retry; } if (session_find_internal(nextpid) != SESSION_NULL) { nextpid++; goto retry; } } nprocs++; child_proc->p_pid = nextpid; child_proc->p_idversion = nextpidversion++; #if 1 if (child_proc->p_pid != 0) { if (pfind_locked(child_proc->p_pid) != PROC_NULL) panic("proc in the list already\n"); } #endif /* Insert in the hash */ child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); proc_list_unlock(); /* * We've identified the PID we are going to use; initialize the new * process structure. */ child_proc->p_stat = SIDL; child_proc->p_pgrpid = PGRPID_DEAD; /* * The zero'ing of the proc was at the allocation time due to need * for insertion to hash. Copy the section that is to be copied * directly from the parent. */ bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); /* * Some flags are inherited from the parent. * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); /* * Note that if the current thread has an assumed identity, this * credential will be granted to the new process. */ child_proc->p_ucred = kauth_cred_get_with_ref(); #ifdef CONFIG_EMBEDDED lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); #else /* !CONFIG_EMBEDDED */ lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); #endif /* !CONFIG_EMBEDDED */ klist_init(&child_proc->p_klist); if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ if (vnode_getwithref(child_proc->p_textvp) == 0) { error = vnode_ref(child_proc->p_textvp); vnode_put(child_proc->p_textvp); if (error != 0) child_proc->p_textvp = NULLVP; } } /* * Copy the parents per process open file table to the child; if * there is a per-thread current working directory, set the childs * per-process current working directory to that instead of the * parents. * * XXX may fail to copy descriptors to child */ child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); #if SYSV_SHM if (parent_proc->vm_shm) { /* XXX may fail to attach shm to child */ (void)shmfork(parent_proc, child_proc); } #endif /* * inherit the limit structure to child */ proc_limitfork(parent_proc, child_proc); if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; } /* Intialize new process stats, including start time */ /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ bzero(&child_proc->p_stats->pstat_startzero, (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - (caddr_t)&child_proc->p_stats->pstat_startzero)); bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); microtime(&child_proc->p_start); child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ if (parent_proc->p_sigacts != NULL) (void)memcpy(child_proc->p_sigacts, parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); else (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); sessp = proc_session(parent_proc); if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); session_rele(sessp); /* * block all signals to reach the process. * no transition race should be occuring with the child yet, * but indicate that the process is in (the creation) transition. */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0); child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; TAILQ_INIT(&child_proc->p_uthlist); TAILQ_INIT(&child_proc->p_aio_activeq); TAILQ_INIT(&child_proc->p_aio_doneq); /* Inherit the parent flags for code sign */ child_proc->p_csflags = parent_proc->p_csflags; /* * All processes have work queue locks; cleaned up by * reap_child_locked() */ workqueue_init_lock(child_proc); /* * Copy work queue information * * Note: This should probably only happen in the case where we are * creating a child that is a copy of the parent; since this * routine is called in the non-duplication case of vfork() * or posix_spawn(), then this information should likely not * be duplicated. * * <rdar://6640553> Work queue pointers that no longer point to code */ child_proc->p_wqthread = parent_proc->p_wqthread; child_proc->p_threadstart = parent_proc->p_threadstart; child_proc->p_pthsize = parent_proc->p_pthsize; child_proc->p_targconc = parent_proc->p_targconc; if ((parent_proc->p_lflag & P_LREGISTER) != 0) { child_proc->p_lflag |= P_LREGISTER; } child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ #if CONFIG_LCTX child_proc->p_lctx = NULL; /* Add new process to login context (if any). */ if (parent_proc->p_lctx != NULL) { /* * <rdar://6640564> This should probably be delayed in the * vfork() or posix_spawn() cases. */ LCTX_LOCK(parent_proc->p_lctx); enterlctx(child_proc, parent_proc->p_lctx, 0); } #endif bad: return(child_proc); }
/* * Mount null layer */ static int nullfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx) { int error = 0; struct vnode *lowerrootvp = NULL, *vp = NULL; struct vfsstatfs * sp = NULL; struct null_mount * xmp = NULL; char data[MAXPATHLEN]; size_t count; struct vfs_attr vfa; /* set defaults (arbitrary since this file system is readonly) */ uint32_t bsize = BLKDEV_IOSIZE; size_t iosize = BLKDEV_IOSIZE; uint64_t blocks = 4711 * 4711; uint64_t bfree = 0; uint64_t bavail = 0; uint64_t bused = 4711; uint64_t files = 4711; uint64_t ffree = 0; kauth_cred_t cred = vfs_context_ucred(ctx); NULLFSDEBUG("nullfs_mount(mp = %p) %llx\n", (void *)mp, vfs_flags(mp)); if (vfs_flags(mp) & MNT_ROOTFS) return (EOPNOTSUPP); /* * Update is a no-op */ if (vfs_isupdate(mp)) { return ENOTSUP; } /* check entitlement */ if (!IOTaskHasEntitlement(current_task(), NULLFS_ENTITLEMENT)) { return EPERM; } /* * Get argument */ error = copyinstr(user_data, data, MAXPATHLEN - 1, &count); if (error) { NULLFSDEBUG("nullfs: error copying data form user %d\n", error); goto error; } /* This could happen if the system is configured for 32 bit inodes instead of * 64 bit */ if (count > MAX_MNT_FROM_LENGTH) { error = EINVAL; NULLFSDEBUG("nullfs: path to translocate too large for this system %d vs %d\n", count, MAX_MNT_FROM_LENGTH); goto error; } error = vnode_lookup(data, 0, &lowerrootvp, ctx); if (error) { NULLFSDEBUG("lookup %s -> %d\n", data, error); goto error; } /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount. Keep this to signal what we want to keep around the thing we are mirroring. Drop it in unmount.*/ error = vnode_ref(lowerrootvp); vnode_put(lowerrootvp); if (error) { // If vnode_ref failed, then null it out so it can't be used anymore in cleanup. lowerrootvp = NULL; goto error; } NULLFSDEBUG("mount %s\n", data); MALLOC(xmp, struct null_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO); if (xmp == NULL) { error = ENOMEM; goto error; } /* * Save reference to underlying FS */ xmp->nullm_lowerrootvp = lowerrootvp; xmp->nullm_lowerrootvid = vnode_vid(lowerrootvp); error = null_getnewvnode(mp, NULL, NULL, &vp, NULL, 1); if (error) { goto error; } /* vp has an iocount on it from vnode_create. drop that for a usecount. This * is our root vnode so we drop the ref in unmount * * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/ vnode_ref(vp); vnode_put(vp); error = nullfs_init_lck(&xmp->nullm_lock); if (error) { goto error; } xmp->nullm_rootvp = vp; /* read the flags the user set, but then ignore some of them, we will only allow them if they are set on the lower file system */ uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL)); uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC); if (lowerflags) { flags |= lowerflags; } /* force these flags */ flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY); vfs_setflags(mp, flags); vfs_setfsprivate(mp, xmp); vfs_getnewfsid(mp); vfs_setlocklocal(mp); /* fill in the stat block */ sp = vfs_statfs(mp); strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH); sp->f_flags = flags; xmp->nullm_flags = NULLM_CASEINSENSITIVE; /* default to case insensitive */ error = nullfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx); if (error == 0) { if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) { bsize = vfa.f_bsize; } if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) { iosize = vfa.f_iosize; } if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) { blocks = vfa.f_blocks; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) { bfree = vfa.f_bfree; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) { bavail = vfa.f_bavail; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) { bused = vfa.f_bused; } if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) { files = vfa.f_files; } if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) { ffree = vfa.f_ffree; } if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) { if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) && (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) { xmp->nullm_flags &= ~NULLM_CASEINSENSITIVE; } } } else { goto error; } sp->f_bsize = bsize; sp->f_iosize = iosize; sp->f_blocks = blocks; sp->f_bfree = bfree; sp->f_bavail = bavail; sp->f_bused = bused; sp->f_files = files; sp->f_ffree = ffree; /* Associate the mac label information from the mirrored filesystem with the * mirror */ MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp)); NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname); return (0); error: if (xmp) { FREE(xmp, M_TEMP); } if (lowerrootvp) { vnode_getwithref(lowerrootvp); vnode_rele(lowerrootvp); vnode_put(lowerrootvp); } if (vp) { /* we made the root vnode but the mount is failed, so clean it up */ vnode_getwithref(vp); vnode_rele(vp); /* give vp back */ vnode_recycle(vp); vnode_put(vp); } return error; }
static errno_t fuse_vfsop_mount(mount_t mp, __unused vnode_t devvp, user_addr_t udata, vfs_context_t context) { int err = 0; int mntopts = 0; bool mounted = false; uint32_t max_read = ~0; size_t len; fuse_device_t fdev = NULL; struct fuse_data *data = NULL; fuse_mount_args fusefs_args; struct vfsstatfs *vfsstatfsp = vfs_statfs(mp); #if M_FUSE4X_ENABLE_BIGLOCK lck_mtx_t *biglock; #endif fuse_trace_printf_vfsop(); if (vfs_isupdate(mp)) { return ENOTSUP; } err = copyin(udata, &fusefs_args, sizeof(fusefs_args)); if (err) { return EINVAL; } /* * Interesting flags that we can receive from mount or may want to * otherwise forcibly set include: * * MNT_ASYNC * MNT_AUTOMOUNTED * MNT_DEFWRITE * MNT_DONTBROWSE * MNT_IGNORE_OWNERSHIP * MNT_JOURNALED * MNT_NODEV * MNT_NOEXEC * MNT_NOSUID * MNT_NOUSERXATTR * MNT_RDONLY * MNT_SYNCHRONOUS * MNT_UNION */ err = ENOTSUP; #if M_FUSE4X_ENABLE_UNSUPPORTED vfs_setlocklocal(mp); #endif /* M_FUSE4X_ENABLE_UNSUPPORTED */ /** Option Processing. **/ if (*fusefs_args.fstypename) { size_t typenamelen = strlen(fusefs_args.fstypename); if (typenamelen > FUSE_FSTYPENAME_MAXLEN) { return EINVAL; } snprintf(vfsstatfsp->f_fstypename, MFSTYPENAMELEN, "%s%s", FUSE_FSTYPENAME_PREFIX, fusefs_args.fstypename); } if (!*fusefs_args.fsname) return EINVAL; if ((fusefs_args.daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) || (fusefs_args.daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT)) { return EINVAL; } if ((fusefs_args.init_timeout > FUSE_MAX_INIT_TIMEOUT) || (fusefs_args.init_timeout < FUSE_MIN_INIT_TIMEOUT)) { return EINVAL; } if (fusefs_args.altflags & FUSE_MOPT_SPARSE) { mntopts |= FSESS_SPARSE; } if (fusefs_args.altflags & FUSE_MOPT_AUTO_CACHE) { mntopts |= FSESS_AUTO_CACHE; } if (fusefs_args.altflags & FUSE_MOPT_AUTO_XATTR) { if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) { return EINVAL; } mntopts |= FSESS_AUTO_XATTR; } else if (fusefs_args.altflags & FUSE_MOPT_NATIVE_XATTR) { mntopts |= FSESS_NATIVE_XATTR; } if (fusefs_args.altflags & FUSE_MOPT_JAIL_SYMLINKS) { mntopts |= FSESS_JAIL_SYMLINKS; } /* * Note that unlike Linux, which keeps allow_root in user-space and * passes allow_other in that case to the kernel, we let allow_root * reach the kernel. The 'if' ordering is important here. */ if (fusefs_args.altflags & FUSE_MOPT_ALLOW_ROOT) { int is_member = 0; if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) { log("fuse4x: caller is not a member of fuse4x admin group. " "Either add user (id=%d) to group (id=%d), " "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n", kauth_cred_getuid(kauth_cred_get()), fuse_admin_group); return EPERM; } mntopts |= FSESS_ALLOW_ROOT; } else if (fusefs_args.altflags & FUSE_MOPT_ALLOW_OTHER) { if (!fuse_allow_other && !fuse_vfs_context_issuser(context)) { int is_member = 0; if ((kauth_cred_ismember_gid(kauth_cred_get(), fuse_admin_group, &is_member) != 0) || !is_member) { log("fuse4x: caller is not a member of fuse4x admin group. " "Either add user (id=%d) to group (id=%d), " "or set correct '" SYSCTL_FUSE4X_TUNABLES_ADMIN "' sysctl value.\n", kauth_cred_getuid(kauth_cred_get()), fuse_admin_group); return EPERM; } } mntopts |= FSESS_ALLOW_OTHER; } if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEDOUBLE) { mntopts |= FSESS_NO_APPLEDOUBLE; } if (fusefs_args.altflags & FUSE_MOPT_NO_APPLEXATTR) { mntopts |= FSESS_NO_APPLEXATTR; } if ((fusefs_args.altflags & FUSE_MOPT_FSID) && (fusefs_args.fsid != 0)) { fsid_t fsid; mount_t other_mp; uint32_t target_dev; target_dev = FUSE_MAKEDEV(FUSE_CUSTOM_FSID_DEVICE_MAJOR, fusefs_args.fsid); fsid.val[0] = target_dev; fsid.val[1] = FUSE_CUSTOM_FSID_VAL1; other_mp = vfs_getvfs(&fsid); if (other_mp != NULL) { return EPERM; } vfsstatfsp->f_fsid.val[0] = target_dev; vfsstatfsp->f_fsid.val[1] = FUSE_CUSTOM_FSID_VAL1; } else { vfs_getnewfsid(mp); } if (fusefs_args.altflags & FUSE_MOPT_NO_ATTRCACHE) { mntopts |= FSESS_NO_ATTRCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NO_READAHEAD) { mntopts |= FSESS_NO_READAHEAD; } if (fusefs_args.altflags & (FUSE_MOPT_NO_UBC | FUSE_MOPT_DIRECT_IO)) { mntopts |= FSESS_NO_UBC; } if (fusefs_args.altflags & FUSE_MOPT_NO_VNCACHE) { mntopts |= FSESS_NO_VNCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NEGATIVE_VNCACHE) { if (mntopts & FSESS_NO_VNCACHE) { return EINVAL; } mntopts |= FSESS_NEGATIVE_VNCACHE; } if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCWRITES) { /* Cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'. */ if (mntopts & (FSESS_NO_READAHEAD | FSESS_NO_UBC)) { log("fuse4x: cannot mix 'nosyncwrites' with 'noubc' or 'noreadahead'\n"); return EINVAL; } mntopts |= FSESS_NO_SYNCWRITES; vfs_clearflags(mp, MNT_SYNCHRONOUS); vfs_setflags(mp, MNT_ASYNC); /* We check for this only if we have nosyncwrites in the first place. */ if (fusefs_args.altflags & FUSE_MOPT_NO_SYNCONCLOSE) { mntopts |= FSESS_NO_SYNCONCLOSE; } } else { vfs_clearflags(mp, MNT_ASYNC); vfs_setflags(mp, MNT_SYNCHRONOUS); } if (mntopts & FSESS_NO_UBC) { /* If no buffer cache, disallow exec from file system. */ vfs_setflags(mp, MNT_NOEXEC); } vfs_setauthopaque(mp); vfs_setauthopaqueaccess(mp); if ((fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) && (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS)) { return EINVAL; } if (fusefs_args.altflags & FUSE_MOPT_DEFAULT_PERMISSIONS) { mntopts |= FSESS_DEFAULT_PERMISSIONS; vfs_clearauthopaque(mp); } if (fusefs_args.altflags & FUSE_MOPT_DEFER_PERMISSIONS) { mntopts |= FSESS_DEFER_PERMISSIONS; } if (fusefs_args.altflags & FUSE_MOPT_EXTENDED_SECURITY) { mntopts |= FSESS_EXTENDED_SECURITY; vfs_setextendedsecurity(mp); } if (fusefs_args.altflags & FUSE_MOPT_LOCALVOL) { vfs_setflags(mp, MNT_LOCAL); } /* done checking incoming option bits */ err = 0; vfs_setfsprivate(mp, NULL); fdev = fuse_device_get(fusefs_args.rdev); if (!fdev) { log("fuse4x: invalid device file (number=%d)\n", fusefs_args.rdev); return EINVAL; } fuse_lck_mtx_lock(fdev->mtx); data = fdev->data; if (!data) { fuse_lck_mtx_unlock(fdev->mtx); return ENXIO; } #if M_FUSE4X_ENABLE_BIGLOCK biglock = data->biglock; fuse_biglock_lock(biglock); #endif if (data->dataflags & FSESS_MOUNTED) { #if M_FUSE4X_ENABLE_BIGLOCK fuse_biglock_unlock(biglock); #endif fuse_lck_mtx_unlock(fdev->mtx); return EALREADY; } if (!(data->dataflags & FSESS_OPENED)) { fuse_lck_mtx_unlock(fdev->mtx); err = ENXIO; goto out; } data->dataflags |= FSESS_MOUNTED; OSAddAtomic(1, (SInt32 *)&fuse_mount_count); mounted = true; if (fdata_dead_get(data)) { fuse_lck_mtx_unlock(fdev->mtx); err = ENOTCONN; goto out; } if (!data->daemoncred) { panic("fuse4x: daemon found but identity unknown"); } if (fuse_vfs_context_issuser(context) && kauth_cred_getuid(vfs_context_ucred(context)) != kauth_cred_getuid(data->daemoncred)) { fuse_lck_mtx_unlock(fdev->mtx); err = EPERM; log("fuse4x: fuse daemon running by user_id=%d does not have privileges to mount on directory %s owned by user_id=%d\n", kauth_cred_getuid(data->daemoncred), vfsstatfsp->f_mntonname, kauth_cred_getuid(vfs_context_ucred(context))); goto out; } data->mp = mp; data->fdev = fdev; data->dataflags |= mntopts; data->daemon_timeout.tv_sec = fusefs_args.daemon_timeout; data->daemon_timeout.tv_nsec = 0; if (data->daemon_timeout.tv_sec) { data->daemon_timeout_p = &(data->daemon_timeout); } else { data->daemon_timeout_p = NULL; } data->init_timeout.tv_sec = fusefs_args.init_timeout; data->init_timeout.tv_nsec = 0; data->max_read = max_read; data->fssubtype = fusefs_args.fssubtype; data->mountaltflags = fusefs_args.altflags; data->noimplflags = (uint64_t)0; data->blocksize = fuse_round_size(fusefs_args.blocksize, FUSE_MIN_BLOCKSIZE, FUSE_MAX_BLOCKSIZE); data->iosize = fuse_round_size(fusefs_args.iosize, FUSE_MIN_IOSIZE, FUSE_MAX_IOSIZE); if (data->iosize < data->blocksize) { data->iosize = data->blocksize; } data->userkernel_bufsize = FUSE_DEFAULT_USERKERNEL_BUFSIZE; copystr(fusefs_args.fsname, vfsstatfsp->f_mntfromname, MNAMELEN - 1, &len); bzero(vfsstatfsp->f_mntfromname + len, MNAMELEN - len); copystr(fusefs_args.volname, data->volname, MAXPATHLEN - 1, &len); bzero(data->volname + len, MAXPATHLEN - len); /* previous location of vfs_setioattr() */ vfs_setfsprivate(mp, data); fuse_lck_mtx_unlock(fdev->mtx); /* Send a handshake message to the daemon. */ fuse_send_init(data, context); struct vfs_attr vfs_attr; VFSATTR_INIT(&vfs_attr); // Our vfs_getattr() doesn't look at most *_IS_ACTIVE()'s err = fuse_vfsop_getattr(mp, &vfs_attr, context); if (!err) { vfsstatfsp->f_bsize = vfs_attr.f_bsize; vfsstatfsp->f_iosize = vfs_attr.f_iosize; vfsstatfsp->f_blocks = vfs_attr.f_blocks; vfsstatfsp->f_bfree = vfs_attr.f_bfree; vfsstatfsp->f_bavail = vfs_attr.f_bavail; vfsstatfsp->f_bused = vfs_attr.f_bused; vfsstatfsp->f_files = vfs_attr.f_files; vfsstatfsp->f_ffree = vfs_attr.f_ffree; // vfsstatfsp->f_fsid already handled above vfsstatfsp->f_owner = kauth_cred_getuid(data->daemoncred); vfsstatfsp->f_flags = vfs_flags(mp); // vfsstatfsp->f_fstypename already handled above // vfsstatfsp->f_mntonname handled elsewhere // vfsstatfsp->f_mnfromname already handled above vfsstatfsp->f_fssubtype = data->fssubtype; } if (fusefs_args.altflags & FUSE_MOPT_BLOCKSIZE) { vfsstatfsp->f_bsize = data->blocksize; } else { //data->blocksize = vfsstatfsp->f_bsize; } if (fusefs_args.altflags & FUSE_MOPT_IOSIZE) { vfsstatfsp->f_iosize = data->iosize; } else { //data->iosize = (uint32_t)vfsstatfsp->f_iosize; vfsstatfsp->f_iosize = data->iosize; } out: if (err) { vfs_setfsprivate(mp, NULL); fuse_lck_mtx_lock(fdev->mtx); data = fdev->data; /* again */ if (mounted) { OSAddAtomic(-1, (SInt32 *)&fuse_mount_count); } if (data) { data->dataflags &= ~FSESS_MOUNTED; if (!(data->dataflags & FSESS_OPENED)) { #if M_FUSE4X_ENABLE_BIGLOCK assert(biglock == data->biglock); fuse_biglock_unlock(biglock); #endif fuse_device_close_final(fdev); /* data is gone now */ } } fuse_lck_mtx_unlock(fdev->mtx); } else { vnode_t fuse_rootvp = NULLVP; err = fuse_vfsop_root(mp, &fuse_rootvp, context); if (err) { goto out; /* go back and follow error path */ } err = vnode_ref(fuse_rootvp); (void)vnode_put(fuse_rootvp); if (err) { goto out; /* go back and follow error path */ } else { struct vfsioattr ioattr; vfs_ioattr(mp, &ioattr); ioattr.io_devblocksize = data->blocksize; vfs_setioattr(mp, &ioattr); } } #if M_FUSE4X_ENABLE_BIGLOCK fuse_lck_mtx_lock(fdev->mtx); data = fdev->data; /* ...and again */ if(data) { assert(data->biglock == biglock); fuse_biglock_unlock(biglock); } fuse_lck_mtx_unlock(fdev->mtx); #endif return err; }
int cttyopen(dev_t dev, int flag, __unused int mode, proc_t p) { vnode_t ttyvp = cttyvp(p); struct vfs_context context; int error = 0; int cttyflag, doclose = 0; struct session *sessp; if (ttyvp == NULL) return (ENXIO); context.vc_thread = current_thread(); context.vc_ucred = kauth_cred_proc_ref(p); sessp = proc_session(p); session_lock(sessp); cttyflag = sessp->s_flags & S_CTTYREF; session_unlock(sessp); /* * A little hack--this device, used by many processes, * happens to do an open on another device, which can * cause unhappiness if the second-level open blocks indefinitely * (as could be the case if the master side has hung up). Since * we know that this driver doesn't care about the serializing * opens and closes, we can drop the lock. To avoid opencount leak, * open the vnode only for the first time. */ if (cttyflag == 0) { devsw_unlock(dev, S_IFCHR); error = VNOP_OPEN(ttyvp, flag, &context); devsw_lock(dev, S_IFCHR); if (error) goto out; /* * If S_CTTYREF is set, some other thread did an open * and was able to set the flag, now perform a close, else * set the flag. */ session_lock(sessp); if (cttyflag == (sessp->s_flags & S_CTTYREF)) sessp->s_flags |= S_CTTYREF; else doclose = 1; session_unlock(sessp); /* * We have to take a reference here to make sure a close * gets called during revoke. Note that once a controlling * tty gets opened by this driver, the only way close will * get called is when the session leader , whose controlling * tty is ttyvp, exits and vnode is revoked. We cannot * redirect close from this driver because underlying controlling * terminal might change and close may get redirected to a * wrong vnode causing panic. */ if (doclose) { devsw_unlock(dev, S_IFCHR); VNOP_CLOSE(ttyvp, flag, &context); devsw_lock(dev, S_IFCHR); } else { error = vnode_ref(ttyvp); } } out: session_rele(sessp); vnode_put(ttyvp); kauth_cred_unref(&context.vc_ucred); return (error); }
/** * Reset volume name to volume id mapping cache. * @param flags */ void afs_CheckVolumeNames(int flags) { afs_int32 i, j; struct volume *tv; unsigned int now; struct vcache *tvc; afs_int32 *volumeID, *cellID, vsize, nvols; #ifdef AFS_DARWIN80_ENV vnode_t tvp; #endif AFS_STATCNT(afs_CheckVolumeNames); nvols = 0; volumeID = cellID = NULL; vsize = 0; ObtainReadLock(&afs_xvolume); if (flags & AFS_VOLCHECK_EXPIRED) { /* * allocate space to hold the volumeIDs and cellIDs, only if * we will be invalidating the mountpoints later on */ for (i = 0; i < NVOLS; i++) for (tv = afs_volumes[i]; tv; tv = tv->next) ++vsize; volumeID = afs_osi_Alloc(2 * vsize * sizeof(*volumeID)); cellID = (volumeID) ? volumeID + vsize : 0; } now = osi_Time(); for (i = 0; i < NVOLS; i++) { for (tv = afs_volumes[i]; tv; tv = tv->next) { if (flags & AFS_VOLCHECK_EXPIRED) { if (((tv->expireTime < (now + 10)) && (tv->states & VRO)) || (flags & AFS_VOLCHECK_FORCE)) { afs_ResetVolumeInfo(tv); /* also resets status */ if (volumeID) { volumeID[nvols] = tv->volume; cellID[nvols] = tv->cell; } ++nvols; continue; } } /* ??? */ if (flags & (AFS_VOLCHECK_BUSY | AFS_VOLCHECK_FORCE)) { for (j = 0; j < AFS_MAXHOSTS; j++) tv->status[j] = not_busy; } } } ReleaseReadLock(&afs_xvolume); /* next ensure all mt points are re-evaluated */ if (nvols || (flags & (AFS_VOLCHECK_FORCE | AFS_VOLCHECK_MTPTS))) { loop: ObtainReadLock(&afs_xvcache); for (i = 0; i < VCSIZE; i++) { for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) { /* if the volume of "mvid" of the vcache entry is among the * ones we found earlier, then we re-evaluate it. Also, if the * force bit is set or we explicitly asked to reevaluate the * mt-pts, we clean the cmvalid bit */ if ((flags & (AFS_VOLCHECK_FORCE | AFS_VOLCHECK_MTPTS)) || (tvc->mvid && inVolList(tvc->mvid, nvols, volumeID, cellID))) tvc->f.states &= ~CMValid; /* If the volume that this file belongs to was reset earlier, * then we should remove its callback. * Again, if forced, always do it. */ if ((tvc->f.states & CRO) && (inVolList(&tvc->f.fid, nvols, volumeID, cellID) || (flags & AFS_VOLCHECK_FORCE))) { if (tvc->f.states & CVInit) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop; } #ifdef AFS_DARWIN80_ENV if (tvc->f.states & CDeadVnode) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop; } tvp = AFSTOV(tvc); if (vnode_get(tvp)) continue; if (vnode_ref(tvp)) { AFS_GUNLOCK(); /* AFSTOV(tvc) may be NULL */ vnode_put(tvp); AFS_GLOCK(); continue; } #else AFS_FAST_HOLD(tvc); #endif ReleaseReadLock(&afs_xvcache); ObtainWriteLock(&afs_xcbhash, 485); /* LOCKXXX: We aren't holding tvc write lock? */ afs_DequeueCallback(tvc); tvc->f.states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)) osi_dnlc_purgedp(tvc); #ifdef AFS_DARWIN80_ENV vnode_put(AFSTOV(tvc)); /* our tvc ptr is still good until now */ AFS_FAST_RELE(tvc); ObtainReadLock(&afs_xvcache); #else ObtainReadLock(&afs_xvcache); /* our tvc ptr is still good until now */ AFS_FAST_RELE(tvc); #endif } } } osi_dnlc_purge(); /* definitely overkill, but it's safer this way. */ ReleaseReadLock(&afs_xvcache); } if (volumeID) afs_osi_Free(volumeID, 2 * vsize * sizeof(*volumeID)); } /*afs_CheckVolumeNames */
static int ClearCallBack(struct rx_connection *a_conn, struct AFSFid *a_fid) { struct vcache *tvc; int i; struct VenusFid localFid; struct volume *tv; #ifdef AFS_DARWIN80_ENV vnode_t vp; #endif AFS_STATCNT(ClearCallBack); AFS_ASSERT_GLOCK(); /* * XXXX Don't hold any server locks here because of callback protocol XXX */ localFid.Cell = 0; localFid.Fid.Volume = a_fid->Volume; localFid.Fid.Vnode = a_fid->Vnode; localFid.Fid.Unique = a_fid->Unique; /* * Volume ID of zero means don't do anything. */ if (a_fid->Volume != 0) { if (a_fid->Vnode == 0) { struct afs_q *tq, *uq; /* * Clear callback for the whole volume. Zip through the * hash chain, nullifying entries whose volume ID matches. */ loop1: ObtainReadLock(&afs_xvcache); i = VCHashV(&localFid); for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) { uq = QPrev(tq); tvc = QTOVH(tq); if (tvc->f.fid.Fid.Volume == a_fid->Volume) { tvc->callback = NULL; if (!localFid.Cell) localFid.Cell = tvc->f.fid.Cell; tvc->dchint = NULL; /* invalidate hints */ if (tvc->f.states & CVInit) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop1; } #if defined(AFS_SGI_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV) || defined(AFS_LINUX20_ENV) AFS_FAST_HOLD(tvc); #else #ifdef AFS_DARWIN80_ENV if (tvc->f.states & CDeadVnode) { if (!(tvc->f.states & CBulkFetching)) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop1; } } vp = AFSTOV(tvc); if (vnode_get(vp)) continue; if (vnode_ref(vp)) { AFS_GUNLOCK(); vnode_put(vp); AFS_GLOCK(); continue; } if (tvc->f.states & (CBulkFetching|CDeadVnode)) { AFS_GUNLOCK(); vnode_recycle(AFSTOV(tvc)); AFS_GLOCK(); } #else AFS_FAST_HOLD(tvc); #endif #endif ReleaseReadLock(&afs_xvcache); ObtainWriteLock(&afs_xcbhash, 449); afs_DequeueCallback(tvc); tvc->f.states &= ~(CStatd | CUnique | CBulkFetching); afs_allCBs++; if (tvc->f.fid.Fid.Vnode & 1) afs_oddCBs++; else afs_evenCBs++; ReleaseWriteLock(&afs_xcbhash); if ((tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR))) osi_dnlc_purgedp(tvc); afs_Trace3(afs_iclSetp, CM_TRACE_CALLBACK, ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, tvc->f.states, ICL_TYPE_INT32, a_fid->Volume); #ifdef AFS_DARWIN80_ENV vnode_put(AFSTOV(tvc)); #endif ObtainReadLock(&afs_xvcache); uq = QPrev(tq); AFS_FAST_RELE(tvc); } else if ((tvc->f.states & CMValid) && (tvc->mvid->Fid.Volume == a_fid->Volume)) { tvc->f.states &= ~CMValid; if (!localFid.Cell) localFid.Cell = tvc->mvid->Cell; } } ReleaseReadLock(&afs_xvcache); /* * XXXX Don't hold any locks here XXXX */ tv = afs_FindVolume(&localFid, 0); if (tv) { afs_ResetVolumeInfo(tv); afs_PutVolume(tv, 0); /* invalidate mtpoint? */ } } /*Clear callbacks for whole volume */ else { /* * Clear callbacks just for the one file. */ struct vcache *uvc; afs_allCBs++; if (a_fid->Vnode & 1) afs_oddCBs++; /*Could do this on volume basis, too */ else afs_evenCBs++; /*A particular fid was specified */ loop2: ObtainReadLock(&afs_xvcache); i = VCHash(&localFid); for (tvc = afs_vhashT[i]; tvc; tvc = uvc) { uvc = tvc->hnext; if (tvc->f.fid.Fid.Vnode == a_fid->Vnode && tvc->f.fid.Fid.Volume == a_fid->Volume && tvc->f.fid.Fid.Unique == a_fid->Unique) { tvc->callback = NULL; tvc->dchint = NULL; /* invalidate hints */ if (tvc->f.states & CVInit) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop2; } #if defined(AFS_SGI_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV) || defined(AFS_LINUX20_ENV) AFS_FAST_HOLD(tvc); #else #ifdef AFS_DARWIN80_ENV if (tvc->f.states & CDeadVnode) { if (!(tvc->f.states & CBulkFetching)) { ReleaseReadLock(&afs_xvcache); afs_osi_Sleep(&tvc->f.states); goto loop2; } } vp = AFSTOV(tvc); if (vnode_get(vp)) continue; if (vnode_ref(vp)) { AFS_GUNLOCK(); vnode_put(vp); AFS_GLOCK(); continue; } if (tvc->f.states & (CBulkFetching|CDeadVnode)) { AFS_GUNLOCK(); vnode_recycle(AFSTOV(tvc)); AFS_GLOCK(); } #else AFS_FAST_HOLD(tvc); #endif #endif ReleaseReadLock(&afs_xvcache); ObtainWriteLock(&afs_xcbhash, 450); afs_DequeueCallback(tvc); tvc->f.states &= ~(CStatd | CUnique | CBulkFetching); ReleaseWriteLock(&afs_xcbhash); if ((tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR))) osi_dnlc_purgedp(tvc); afs_Trace3(afs_iclSetp, CM_TRACE_CALLBACK, ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, tvc->f.states, ICL_TYPE_LONG, 0); #ifdef CBDEBUG lastCallBack_vnode = afid->Vnode; lastCallBack_dv = tvc->mstat.DataVersion.low; osi_GetuTime(&lastCallBack_time); #endif /* CBDEBUG */ #ifdef AFS_DARWIN80_ENV vnode_put(AFSTOV(tvc)); #endif ObtainReadLock(&afs_xvcache); uvc = tvc->hnext; AFS_FAST_RELE(tvc); } } /*Walk through hash table */ ReleaseReadLock(&afs_xvcache); } /*Clear callbacks for one file */ } /*Fid has non-zero volume ID */ /* * Always return a predictable value. */ return (0); } /*ClearCallBack */