/* * Query domain table for a given domain. * * If domain isn't found and addok is set, it is added to AVL trees and * the zsb->z_fuid_dirty flag will be set to TRUE. It will then be * necessary for the caller or another thread to detect the dirty table * and sync out the changes. */ int zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain, char **retdomain, boolean_t addok) { fuid_domain_t searchnode, *findnode; avl_index_t loc; krw_t rw = RW_READER; /* * If the dummy "nobody" domain then return an index of 0 * to cause the created FUID to be a standard POSIX id * for the user nobody. */ if (domain[0] == '\0') { if (retdomain) *retdomain = nulldomain; return (0); } searchnode.f_ksid = ksid_lookupdomain(domain); if (retdomain) *retdomain = searchnode.f_ksid->kd_name; if (!zsb->z_fuid_loaded) zfs_fuid_init(zsb); retry: rw_enter(&zsb->z_fuid_lock, rw); findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc); if (findnode) { rw_exit(&zsb->z_fuid_lock); ksiddomain_rele(searchnode.f_ksid); return (findnode->f_idx); } else if (addok) { fuid_domain_t *domnode; uint64_t retidx; if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) { rw_exit(&zsb->z_fuid_lock); rw = RW_WRITER; goto retry; } domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); domnode->f_ksid = searchnode.f_ksid; retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1; avl_add(&zsb->z_fuid_domain, domnode); avl_add(&zsb->z_fuid_idx, domnode); zsb->z_fuid_dirty = B_TRUE; rw_exit(&zsb->z_fuid_lock); return (retidx); } else { rw_exit(&zsb->z_fuid_lock); return (-1); } }
/* * Build directory vnodes based on the profile and the global * dev instance. */ void prof_filldir(sdev_node_t *ddv) { sdev_node_t *gdir; ASSERT(RW_READ_HELD(&ddv->sdev_contents)); if (!prof_dev_needupdate(ddv)) { ASSERT(RW_READ_HELD(&ddv->sdev_contents)); return; } /* * Upgrade to writer lock */ if (rw_tryupgrade(&ddv->sdev_contents) == 0) { /* * We need to drop the read lock and re-acquire it as a * write lock. While we do this the condition may change so we * need to re-check condition */ rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); if (!prof_dev_needupdate(ddv)) { /* Downgrade back to the read lock before returning */ rw_downgrade(&ddv->sdev_contents); return; } } /* At this point we should have a write lock */ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); sdcmn_err10(("devtree_gen (%s): %ld -> %ld\n", ddv->sdev_path, ddv->sdev_devtree_gen, devtree_gen)); gdir = ddv->sdev_origin; if (gdir != NULL) sdcmn_err10(("sdev_dir_gen (%s): %ld -> %ld\n", ddv->sdev_path, ddv->sdev_ldir_gen, gdir->sdev_gdir_gen)); /* update flags and generation number so next filldir is quick */ if ((ddv->sdev_flags & SDEV_BUILD) == SDEV_BUILD) { ddv->sdev_flags &= ~SDEV_BUILD; } ddv->sdev_devtree_gen = devtree_gen; if (gdir != NULL) ddv->sdev_ldir_gen = gdir->sdev_gdir_gen; prof_make_symlinks(ddv); prof_make_maps(ddv); prof_make_names(ddv); rw_downgrade(&ddv->sdev_contents); }
static int zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) { if (RW_WRITE_HELD(&zap->zap_rwlock)) return (1); if (rw_tryupgrade(&zap->zap_rwlock)) { dmu_buf_will_dirty(zap->zap_dbuf, tx); return (1); } return (0); }
static int splat_rwlock_test6(struct file *file, void *arg) { rw_priv_t *rwp; int rc = -EINVAL; rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL); if (rwp == NULL) return -ENOMEM; splat_init_rw_priv(rwp, file); rw_enter(&rwp->rw_rwlock, RW_READER); if (!RW_READ_HELD(&rwp->rw_rwlock)) { splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should be read lock: %d\n", RW_READ_HELD(&rwp->rw_rwlock)); goto out; } #if defined(CONFIG_RWSEM_GENERIC_SPINLOCK) /* With one reader upgrade should never fail */ rc = rw_tryupgrade(&rwp->rw_rwlock); if (!rc) { splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock contended preventing upgrade: %d\n", RW_READ_HELD(&rwp->rw_rwlock)); goto out; } if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) { splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should " "have 0 (not %d) reader and 1 (not %d) writer\n", RW_READ_HELD(&rwp->rw_rwlock), RW_WRITE_HELD(&rwp->rw_rwlock)); goto out; } rc = 0; splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s", "rwlock properly upgraded\n"); #else rc = 0; splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s", "rw_tryupgrade() is disabled for this arch\n"); #endif out: rw_exit(&rwp->rw_rwlock); rw_destroy(&rwp->rw_rwlock); kfree(rwp); return rc; }
/* * Clean pts sdev_nodes that are no longer valid. */ static void devpts_prunedir(struct sdev_node *ddv) { struct vnode *vp; struct sdev_node *dv, *next = NULL; int (*vtor)(struct sdev_node *) = NULL; ASSERT(ddv->sdev_flags & SDEV_VTOR); vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); ASSERT(vtor); if (rw_tryupgrade(&ddv->sdev_contents) == NULL) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } for (dv = ddv->sdev_dot; dv; dv = next) { next = dv->sdev_next; /* skip stale nodes */ if (dv->sdev_flags & SDEV_STALE) continue; /* validate and prune only ready nodes */ if (dv->sdev_state != SDEV_READY) continue; switch (vtor(dv)) { case SDEV_VTOR_VALID: case SDEV_VTOR_SKIP: continue; case SDEV_VTOR_INVALID: sdcmn_err7(("prunedir: destroy invalid " "node: %s(%p)\n", dv->sdev_name, (void *)dv)); break; } vp = SDEVTOV(dv); if (vp->v_count > 0) continue; SDEV_HOLD(dv); /* remove the cache node */ (void) sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); } rw_downgrade(&ddv->sdev_contents); }
/* * This is the predictive prefetch entry point. It associates dnode access * specified with blkid and nblks arguments with prefetch stream, predicts * further accesses based on that stats and initiates speculative prefetch. * fetch_data argument specifies whether actual data blocks should be fetched: * FALSE -- prefetch only indirect blocks for predicted data blocks; * TRUE -- prefetch predicted data blocks plus following indirect blocks. */ void dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) { zstream_t *zs; int64_t pf_start, ipf_start, ipf_istart, ipf_iend; int64_t pf_ahead_blks, max_blks; int epbs, max_dist_blks, pf_nblks, ipf_nblks; uint64_t end_of_access_blkid = blkid + nblks; if (zfs_prefetch_disable) return; /* * As a fast path for small (single-block) files, ignore access * to the first block. */ if (blkid == 0) return; rw_enter(&zf->zf_rwlock, RW_READER); for (zs = list_head(&zf->zf_stream); zs != NULL; zs = list_next(&zf->zf_stream, zs)) { if (blkid == zs->zs_blkid) { mutex_enter(&zs->zs_lock); /* * zs_blkid could have changed before we * acquired zs_lock; re-check them here. */ if (blkid != zs->zs_blkid) { mutex_exit(&zs->zs_lock); continue; } break; } } if (zs == NULL) { /* * This access is not part of any existing stream. Create * a new stream for it. */ ZFETCHSTAT_BUMP(zfetchstat_misses); if (rw_tryupgrade(&zf->zf_rwlock)) dmu_zfetch_stream_create(zf, end_of_access_blkid); rw_exit(&zf->zf_rwlock); return; } /* * This access was to a block that we issued a prefetch for on * behalf of this stream. Issue further prefetches for this stream. * * Normally, we start prefetching where we stopped * prefetching last (zs_pf_blkid). But when we get our first * hit on this stream, zs_pf_blkid == zs_blkid, we don't * want to prefetch the block we just accessed. In this case, * start just after the block we just accessed. */ pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid); /* * Double our amount of prefetched data, but don't let the * prefetch get further ahead than zfetch_max_distance. */ if (fetch_data) { max_dist_blks = zfetch_max_distance >> zf->zf_dnode->dn_datablkshift; /* * Previously, we were (zs_pf_blkid - blkid) ahead. We * want to now be double that, so read that amount again, * plus the amount we are catching up by (i.e. the amount * read just now). */ pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks; max_blks = max_dist_blks - (pf_start - end_of_access_blkid); pf_nblks = MIN(pf_ahead_blks, max_blks); } else {
int rumpuser_rw_tryupgrade(struct rumpuser_rw *rw) { return rw_tryupgrade(rw); }
/* * Find the policy that matches this device. */ static devplcy_t * match_policy(devplcyent_t *de, dev_t dev, vtype_t spec) { char *mname = NULL; minor_t min = getminor(dev); for (; de != NULL; de = de->dpe_next) { if (de->dpe_flags & DPE_ALLMINOR) break; if (de->dpe_flags & DPE_EXPANDED) { if (min >= de->dpe_lomin && min <= de->dpe_himin && spec == de->dpe_spec) { break; } else { continue; } } /* * We now need the minor name to match string or * simle regexp. Could we use csp->s_dip and not * allocate a string here? */ if (mname == NULL && ddi_lyr_get_minor_name(dev, spec, &mname) != DDI_SUCCESS) /* mname can be set after the function fails */ return (dfltpolicy); /* Simple wildcard, with only one ``*'' */ if (de->dpe_flags & DPE_WILDC) { int plen = de->dpe_len - 1; int slen = strlen(mname); char *pp = de->dpe_expr; char *sp = mname; /* string must be at least as long as pattern w/o '*' */ if (slen < plen - 1) continue; /* skip prefix */ while (*pp == *sp && *pp != '\0') { pp++; sp++; } /* matched single '*' */ if (*pp == '\0') if (*sp == '\0') break; else continue; if (*pp != '*') continue; pp++; /* * skip characters matched by '*': difference of * length of s and length of pattern sans '*' */ sp += slen - (plen - 1); if (strcmp(pp, sp) == 0) /* match! */ break; } else if (strcmp(de->dpe_expr, mname) == 0) { /* Store minor number, if no contention */ if (rw_tryupgrade(&policyrw)) { de->dpe_lomin = de->dpe_himin = min; de->dpe_spec = spec; de->dpe_flags |= DPE_EXPANDED; } break; } } if (mname != NULL) kmem_free(mname, strlen(mname) + 1); return (de != NULL ? de->dpe_plcy : dfltpolicy); }
static int nvpflush_one(nvfd_t *nvfd) { int rval = DDI_SUCCESS; nvlist_t *nvl; rw_enter(&nvfd->nvf_lock, RW_READER); if (!NVF_IS_DIRTY(nvfd) || NVF_IS_READONLY(nvfd)) { rw_exit(&nvfd->nvf_lock); return (DDI_SUCCESS); } if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { KFIOERR((CE_CONT, "nvpflush: " "%s rw upgrade failed\n", nvfd->nvf_name)); rw_exit(&nvfd->nvf_lock); return (DDI_FAILURE); } if (((nvfd->nvf_nvp2nvl)(nvfd, &nvl)) != DDI_SUCCESS) { KFIOERR((CE_CONT, "nvpflush: " "%s nvlist construction failed\n", nvfd->nvf_name)); rw_exit(&nvfd->nvf_lock); return (DDI_FAILURE); } NVF_CLEAR_DIRTY(nvfd); nvfd->nvf_flags |= NVF_FLUSHING; rw_exit(&nvfd->nvf_lock); rval = e_fwrite_nvlist(nvfd, nvl); nvlist_free(nvl); rw_enter(&nvfd->nvf_lock, RW_WRITER); nvfd->nvf_flags &= ~NVF_FLUSHING; if (rval == DDI_FAILURE) { if (NVF_IS_READONLY(nvfd)) { rval = DDI_SUCCESS; nvfd->nvf_flags &= ~(NVF_ERROR | NVF_DIRTY); } else if ((nvfd->nvf_flags & NVF_ERROR) == 0) { cmn_err(CE_CONT, "%s: updated failed\n", nvfd->nvf_name); nvfd->nvf_flags |= NVF_ERROR | NVF_DIRTY; } } else { if (nvfd->nvf_flags & NVF_CREATE_MSG) { cmn_err(CE_CONT, "!Creating %s\n", nvfd->nvf_name); nvfd->nvf_flags &= ~NVF_CREATE_MSG; } if (nvfd->nvf_flags & NVF_REBUILD_MSG) { cmn_err(CE_CONT, "!Rebuilding %s\n", nvfd->nvf_name); nvfd->nvf_flags &= ~NVF_REBUILD_MSG; } if (nvfd->nvf_flags & NVF_ERROR) { cmn_err(CE_CONT, "%s: update now ok\n", nvfd->nvf_name); nvfd->nvf_flags &= ~NVF_ERROR; } /* * The file may need to be flushed again if the cached * data was touched while writing the earlier contents. */ if (NVF_IS_DIRTY(nvfd)) rval = DDI_FAILURE; } rw_exit(&nvfd->nvf_lock); return (rval); }
int sam_refresh_shared_reader_ino( sam_node_t *ip, /* Pointer to the inode */ boolean_t writelock, /* Inode WRITER lock held, */ /* otherwise READER lock held. */ cred_t *credp) /* credentials. */ { sam_id_t id; struct sam_perm_inode *permip; buf_t *bp; int refresh = 0; int error; if ((ip->updtime + ip->mp->mt.fi_invalid) > SAM_SECOND()) { return (0); } if (!writelock) { /* * Acquire inode lock before buffer lock. Recheck the update * time. */ if (!rw_tryupgrade(&ip->inode_rwl)) { RW_UNLOCK_OS(&ip->inode_rwl, RW_READER); RW_LOCK_OS(&ip->inode_rwl, RW_WRITER); if ((ip->updtime + ip->mp->mt.fi_invalid) > SAM_SECOND()) { error = 0; goto out; } } } id = ip->di.id; if ((error = sam_read_ino(ip->mp, id.ino, &bp, &permip))) { goto out; } if ((permip->di.mode != 0) && (permip->di.id.ino == ip->di.id.ino) && (permip->di.id.gen == ip->di.id.gen)) { if ((permip->di.modify_time.tv_sec != ip->di.modify_time.tv_sec) || (permip->di.modify_time.tv_nsec != ip->di.modify_time.tv_nsec)|| (permip->di.change_time.tv_sec != ip->di.change_time.tv_sec) || (permip->di.change_time.tv_nsec != ip->di.change_time.tv_nsec)|| (permip->di.residence_time != ip->di.residence_time) || (permip->di.rm.size != ip->di.rm.size) || (permip->di.mode != ip->di.mode)) { refresh = 1; } else { ip->di.uid = permip->di.uid; ip->di.gid = permip->di.gid; } } else { refresh = 1; error = ENOENT; /* This inode has been removed */ } if (refresh) { vnode_t *vp = SAM_ITOV(ip); /* * If a refresh is needed on a directory inode, * invalidate associated dnlc entries. */ if (S_ISDIR(ip->di.mode)) { sam_invalidate_dnlc(vp); } /* * Move shared_writer's inode copy into inode. Set size * and invalidate pages. Set shared_reader update time. */ ip->di = permip->di; /* Move disk ino to incore ino */ ip->di2 = permip->di2; brelse(bp); vp->v_type = IFTOVT(S_ISREQ(ip->di.mode) ? S_IFREG : ip->di.mode); sam_set_size(ip); (void) VOP_PUTPAGE_OS(vp, 0, 0, B_INVAL, credp, NULL); if (ip->di.status.b.acl) { (void) sam_acl_inactive(ip); error = sam_get_acl(ip, &ip->aclp); } ip->updtime = SAM_SECOND(); } else { ip->updtime = SAM_SECOND(); brelse(bp); } out: if (!writelock) { rw_downgrade(&ip->inode_rwl); } return (error); }
static int auto_lookup( vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, int flags, vnode_t *rdir, cred_t *cred, caller_context_t *ct, int *direntflags, pathname_t *realpnp) { int error = 0; vnode_t *newvp = NULL; vfs_t *vfsp; fninfo_t *dfnip; fnnode_t *dfnp = NULL; fnnode_t *fnp = NULL; char *searchnm; int operation; /* either AUTOFS_LOOKUP or AUTOFS_MOUNT */ dfnip = vfstofni(dvp->v_vfsp); AUTOFS_DPRINT((3, "auto_lookup: dvp=%p (%s) name=%s\n", (void *)dvp, dfnip->fi_map, nm)); if (nm[0] == 0) { VN_HOLD(dvp); *vpp = dvp; return (0); } if (error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) return (error); if (nm[0] == '.' && nm[1] == 0) { VN_HOLD(dvp); *vpp = dvp; return (0); } if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { fnnode_t *pdfnp; pdfnp = (vntofn(dvp))->fn_parent; ASSERT(pdfnp != NULL); /* * Since it is legitimate to have the VROOT flag set for the * subdirectories of the indirect map in autofs filesystem, * rootfnnodep is checked against fnnode of dvp instead of * just checking whether VROOT flag is set in dvp */ if (pdfnp == pdfnp->fn_globals->fng_rootfnnodep) { vnode_t *vp; vfs_rlock_wait(dvp->v_vfsp); if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) { vfs_unlock(dvp->v_vfsp); return (EIO); } vp = dvp->v_vfsp->vfs_vnodecovered; VN_HOLD(vp); vfs_unlock(dvp->v_vfsp); error = VOP_LOOKUP(vp, nm, vpp, pnp, flags, rdir, cred, ct, direntflags, realpnp); VN_RELE(vp); return (error); } else { *vpp = fntovn(pdfnp); VN_HOLD(*vpp); return (0); } } top: dfnp = vntofn(dvp); searchnm = nm; operation = 0; ASSERT(vn_matchops(dvp, auto_vnodeops)); AUTOFS_DPRINT((3, "auto_lookup: dvp=%p dfnp=%p\n", (void *)dvp, (void *)dfnp)); /* * If a lookup or mount of this node is in progress, wait for it * to finish, and return whatever result it got. */ mutex_enter(&dfnp->fn_lock); if (dfnp->fn_flags & (MF_LOOKUP | MF_INPROG)) { mutex_exit(&dfnp->fn_lock); error = auto_wait4mount(dfnp); if (error == AUTOFS_SHUTDOWN) error = ENOENT; if (error == EAGAIN) goto top; if (error) return (error); } else mutex_exit(&dfnp->fn_lock); error = vn_vfsrlock_wait(dvp); if (error) return (error); vfsp = vn_mountedvfs(dvp); if (vfsp != NULL) { error = VFS_ROOT(vfsp, &newvp); vn_vfsunlock(dvp); if (!error) { error = VOP_LOOKUP(newvp, nm, vpp, pnp, flags, rdir, cred, ct, direntflags, realpnp); VN_RELE(newvp); } return (error); } vn_vfsunlock(dvp); rw_enter(&dfnp->fn_rwlock, RW_READER); error = auto_search(dfnp, nm, &fnp, cred); if (error) { if (dfnip->fi_flags & MF_DIRECT) { /* * direct map. */ if (dfnp->fn_dirents) { /* * Mount previously triggered. * 'nm' not found */ error = ENOENT; } else { /* * I need to contact the daemon to trigger * the mount. 'dfnp' will be the mountpoint. */ operation = AUTOFS_MOUNT; VN_HOLD(fntovn(dfnp)); fnp = dfnp; error = 0; } } else if (dvp == dfnip->fi_rootvp) { /* * 'dfnp' is the root of the indirect AUTOFS. */ if (rw_tryupgrade(&dfnp->fn_rwlock) == 0) { /* * Could not acquire writer lock, release * reader, and wait until available. We * need to search for 'nm' again, since we * had to release the lock before reacquiring * it. */ rw_exit(&dfnp->fn_rwlock); rw_enter(&dfnp->fn_rwlock, RW_WRITER); error = auto_search(dfnp, nm, &fnp, cred); } ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock)); if (error) { /* * create node being looked-up and request * mount on it. */ error = auto_enter(dfnp, nm, &fnp, kcred); if (!error) operation = AUTOFS_LOOKUP; } } else if ((dfnp->fn_dirents == NULL) && ((dvp->v_flag & VROOT) == 0) && ((fntovn(dfnp->fn_parent))->v_flag & VROOT)) { /* * dfnp is the actual 'mountpoint' of indirect map, * it is the equivalent of a direct mount, * ie, /home/'user1' */ operation = AUTOFS_MOUNT; VN_HOLD(fntovn(dfnp)); fnp = dfnp; error = 0; searchnm = dfnp->fn_name; } } if (error == EAGAIN) { rw_exit(&dfnp->fn_rwlock); goto top; } if (error) { rw_exit(&dfnp->fn_rwlock); return (error); } /* * We now have the actual fnnode we're interested in. * The 'MF_LOOKUP' indicates another thread is currently * performing a daemon lookup of this node, therefore we * wait for its completion. * The 'MF_INPROG' indicates another thread is currently * performing a daemon mount of this node, we wait for it * to be done if we are performing a MOUNT. We don't * wait for it if we are performing a LOOKUP. * We can release the reader/writer lock as soon as we acquire * the mutex, since the state of the lock can only change by * first acquiring the mutex. */ mutex_enter(&fnp->fn_lock); rw_exit(&dfnp->fn_rwlock); if ((fnp->fn_flags & MF_LOOKUP) || ((operation == AUTOFS_MOUNT) && (fnp->fn_flags & MF_INPROG))) { mutex_exit(&fnp->fn_lock); error = auto_wait4mount(fnp); VN_RELE(fntovn(fnp)); if (error == AUTOFS_SHUTDOWN) error = ENOENT; if (error && error != EAGAIN) return (error); goto top; } if (operation == 0) { /* * got the fnnode, check for any errors * on the previous operation on that node. */ error = fnp->fn_error; if ((error == EINTR) || (error == EAGAIN)) { /* * previous operation on this node was * not completed, do a lookup now. */ operation = AUTOFS_LOOKUP; } else { /* * previous operation completed. Return * a pointer to the node only if there was * no error. */ mutex_exit(&fnp->fn_lock); if (!error) *vpp = fntovn(fnp); else VN_RELE(fntovn(fnp)); return (error); } } /* * Since I got to this point, it means I'm the one * responsible for triggering the mount/look-up of this node. */ switch (operation) { case AUTOFS_LOOKUP: AUTOFS_BLOCK_OTHERS(fnp, MF_LOOKUP); fnp->fn_error = 0; mutex_exit(&fnp->fn_lock); error = auto_lookup_aux(fnp, searchnm, cred); if (!error) { /* * Return this vnode */ *vpp = fntovn(fnp); } else { /* * release our reference to this vnode * and return error */ VN_RELE(fntovn(fnp)); } break; case AUTOFS_MOUNT: AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG); fnp->fn_error = 0; mutex_exit(&fnp->fn_lock); /* * auto_new_mount_thread fires up a new thread which * calls automountd finishing up the work */ auto_new_mount_thread(fnp, searchnm, cred); /* * At this point, we are simply another thread * waiting for the mount to complete */ error = auto_wait4mount(fnp); if (error == AUTOFS_SHUTDOWN) error = ENOENT; /* * now release our reference to this vnode */ VN_RELE(fntovn(fnp)); if (!error) goto top; break; default: auto_log(dfnp->fn_globals->fng_verbose, dfnp->fn_globals->fng_zoneid, CE_WARN, "auto_lookup: unknown operation %d", operation); } AUTOFS_DPRINT((5, "auto_lookup: name=%s *vpp=%p return=%d\n", nm, (void *)*vpp, error)); return (error); }
/*ARGSUSED*/ int ufs_rdwr_data( vnode_t *vnodep, u_offset_t offset, size_t len, fdbuffer_t *fdbp, int flags, cred_t *credp) { struct inode *ip = VTOI(vnodep); struct fs *fs; struct ufsvfs *ufsvfsp = ip->i_ufsvfs; struct buf *bp; krw_t rwtype = RW_READER; u_offset_t offset1 = offset; /* Initial offset */ size_t iolen; int curlen = 0; int pplen; daddr_t bn; int contig = 0; int error = 0; int nbytes; /* Number bytes this IO */ int offsetn; /* Start point this IO */ int iswrite = flags & B_WRITE; int io_started = 0; /* No IO started */ struct ulockfs *ulp; uint_t protp = PROT_ALL; error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite, &protp); if (error) { if (flags & B_ASYNC) { fdb_ioerrdone(fdbp, error); } return (error); } fs = ufsvfsp->vfs_fs; iolen = len; DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx" " isize: %llx fdb: %p\n", flags & B_READ ? "READ" : "WRITE", (void *)vnodep, (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp)); rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); rw_enter(&ip->i_contents, rwtype); ASSERT(offset1 < ip->i_size); if ((offset1 + iolen) > ip->i_size) { iolen = ip->i_size - offset1; } while (!error && curlen < iolen) { contig = 0; if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) { break; } ASSERT(!(bn == UFS_HOLE && iswrite)); if (bn == UFS_HOLE) { /* * If the above assertion is true, * then the following if statement can never be true. */ if (iswrite && (rwtype == RW_READER)) { rwtype = RW_WRITER; if (!rw_tryupgrade(&ip->i_contents)) { rw_exit(&ip->i_contents); rw_enter(&ip->i_contents, rwtype); continue; } } offsetn = blkoff(fs, offset1); pplen = P2ROUNDUP(len, PAGESIZE); nbytes = MIN((pplen - curlen), (fs->fs_bsize - offsetn)); ASSERT(nbytes > 0); /* * We may be reading or writing. */ DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n", offset1, (iolen - curlen))); if (iswrite) { printf("**WARNING: ignoring hole in write\n"); error = ENOSPC; } else { fdb_add_hole(fdbp, offset1 - offset, nbytes); } offset1 += nbytes; curlen += nbytes; continue; } ASSERT(contig > 0); pplen = P2ROUNDUP(len, PAGESIZE); contig = MIN(contig, len - curlen); contig = P2ROUNDUP(contig, DEV_BSIZE); bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)offset1; if (ufsvfsp->vfs_snapshot) { fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; offset1 += contig; curlen += contig; if (iswrite) lwp_stat_update(LWP_STAT_OUBLK, 1); else lwp_stat_update(LWP_STAT_INBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); } DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n", offset1, (iolen - curlen))); } DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n", offset1, (iolen - curlen), (void *)vnodep->v_pages)); rw_exit(&ip->i_contents); rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); if (flags & B_ASYNC) { /* * Show that no more asynchronous IO will be added */ fdb_ioerrdone(fdbp, error); } if (ulp) { ufs_lockfs_end(ulp); } if (io_started && flags & B_ASYNC) { return (0); } else { return (error); } }
int priv_getbyname(const char *name, uint_t flag) { int i; int wheld = 0; int len; char *p; if (flag != 0 && flag != PRIV_ALLOC) return (-EINVAL); if (strncasecmp(name, "priv_", 5) == 0) name += 5; rw_enter(&privinfo_lock, RW_READER); rescan: for (i = 0; i < nprivs; i++) if (strcasecmp(priv_names[i], name) == 0) { rw_exit(&privinfo_lock); return (i); } if (!wheld) { if (!(flag & PRIV_ALLOC)) { rw_exit(&privinfo_lock); return (-EINVAL); } /* check length, validity and available space */ len = strlen(name) + 1; if (len > PRIVNAME_MAX) { rw_exit(&privinfo_lock); return (-ENAMETOOLONG); } for (p = (char *)name; *p != '\0'; p++) { char c = *p; if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')) { rw_exit(&privinfo_lock); return (-EINVAL); } } if (!rw_tryupgrade(&privinfo_lock)) { rw_exit(&privinfo_lock); rw_enter(&privinfo_lock, RW_WRITER); wheld = 1; /* Someone may have added our privilege */ goto rescan; } } if (nprivs == MAX_PRIVILEGE || len + privbytes > maxprivbytes) { rw_exit(&privinfo_lock); return (-ENOMEM); } priv_names[i] = p = priv_str + privbytes; bcopy(name, p, len); /* make the priv_names[i] and privilege name globally visible */ membar_producer(); /* adjust priv count and bytes count */ priv_ninfo->cnt = priv_info->priv_max = ++nprivs; privbytes += len; rw_exit(&privinfo_lock); return (i); }
/* * Query domain table for a given domain. * * If domain isn't found it is added to AVL trees and * the results are pushed out to disk. */ int zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain, dmu_tx_t *tx) { fuid_domain_t searchnode, *findnode; avl_index_t loc; krw_t rw = RW_READER; /* * If the dummy "nobody" domain then return an index of 0 * to cause the created FUID to be a standard POSIX id * for the user nobody. */ if (domain[0] == '\0') { *retdomain = nulldomain; return (0); } searchnode.f_ksid = ksid_lookupdomain(domain); if (retdomain) { *retdomain = searchnode.f_ksid->kd_name; } if (!zfsvfs->z_fuid_loaded) zfs_fuid_init(zfsvfs, tx); retry: rw_enter(&zfsvfs->z_fuid_lock, rw); findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc); if (findnode) { rw_exit(&zfsvfs->z_fuid_lock); ksiddomain_rele(searchnode.f_ksid); return (findnode->f_idx); } else { fuid_domain_t *domnode; nvlist_t *nvp; nvlist_t **fuids; uint64_t retidx; size_t nvsize = 0; char *packed; dmu_buf_t *db; int i = 0; if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) { rw_exit(&zfsvfs->z_fuid_lock); rw = RW_WRITER; goto retry; } domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); domnode->f_ksid = searchnode.f_ksid; retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1; avl_add(&zfsvfs->z_fuid_domain, domnode); avl_add(&zfsvfs->z_fuid_idx, domnode); /* * Now resync the on-disk nvlist. */ VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); domnode = avl_first(&zfsvfs->z_fuid_domain); fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP); while (domnode) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN, domnode->f_ksid->kd_name) == 0); domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode); } VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, fuids, retidx) == 0); for (i = 0; i != retidx; i++) nvlist_free(fuids[i]); kmem_free(fuids, retidx * sizeof (void *)); VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); rw_exit(&zfsvfs->z_fuid_lock); return (retidx); } }
static void devvt_cleandir(struct vnode *dvp, struct cred *cred) { struct sdev_node *sdvp = VTOSDEV(dvp); struct sdev_node *dv, *next = NULL; int min, cnt; char found = 0; mutex_enter(&vc_lock); cnt = VC_INSTANCES_COUNT; mutex_exit(&vc_lock); /* We have to fool warlock this way, otherwise it will complain */ #ifndef __lock_lint if (rw_tryupgrade(&sdvp->sdev_contents) == NULL) { rw_exit(&sdvp->sdev_contents); rw_enter(&sdvp->sdev_contents, RW_WRITER); } #else rw_enter(&sdvp->sdev_contents, RW_WRITER); #endif /* 1. prune invalid nodes and rebuild stale symlinks */ devvt_prunedir(sdvp); /* 2. create missing nodes */ for (min = 0; min < cnt; min++) { char nm[16]; if (vt_minor_valid(min) == B_FALSE) continue; (void) snprintf(nm, sizeof (nm), "%d", min); found = 0; for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) { next = SDEV_NEXT_ENTRY(sdvp, dv); /* validate only ready nodes */ if (dv->sdev_state != SDEV_READY) continue; if (strcmp(nm, dv->sdev_name) == 0) { found = 1; break; } } if (!found) { devvt_create_snode(sdvp, nm, cred, SDEV_VATTR); } } /* 3. create active link node and console user link node */ found = 0; for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) { next = SDEV_NEXT_ENTRY(sdvp, dv); /* validate only ready nodes */ if (dv->sdev_state != SDEV_READY) continue; if ((strcmp(dv->sdev_name, DEVVT_ACTIVE_NAME) == NULL)) found |= 0x01; if ((strcmp(dv->sdev_name, DEVVT_CONSUSER_NAME) == NULL)) found |= 0x02; if ((found & 0x01) && (found & 0x02)) break; } if (!(found & 0x01)) devvt_create_snode(sdvp, DEVVT_ACTIVE_NAME, cred, SDEV_VLINK); if (!(found & 0x02)) devvt_create_snode(sdvp, DEVVT_CONSUSER_NAME, cred, SDEV_VLINK); #ifndef __lock_lint rw_downgrade(&sdvp->sdev_contents); #else rw_exit(&sdvp->sdev_contents); #endif }
/* * This is the predictive prefetch entry point. It associates dnode access * specified with blkid and nblks arguments with prefetch stream, predicts * further accesses based on that stats and initiates speculative prefetch. * fetch_data argument specifies whether actual data blocks should be fetched: * FALSE -- prefetch only indirect blocks for predicted data blocks; * TRUE -- prefetch predicted data blocks plus following indirect blocks. */ void dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) { zstream_t *zs; int64_t pf_start, ipf_start, ipf_istart, ipf_iend; int64_t pf_ahead_blks, max_blks; int epbs, max_dist_blks, pf_nblks, ipf_nblks; uint64_t end_of_access_blkid = blkid + nblks; spa_t *spa = zf->zf_dnode->dn_objset->os_spa; if (zfs_prefetch_disable) return; /* * If we haven't yet loaded the indirect vdevs' mappings, we * can only read from blocks that we carefully ensure are on * concrete vdevs (or previously-loaded indirect vdevs). So we * can't allow the predictive prefetcher to attempt reads of other * blocks (e.g. of the MOS's dnode obejct). */ if (!spa_indirect_vdevs_loaded(spa)) return; /* * As a fast path for small (single-block) files, ignore access * to the first block. */ if (blkid == 0) return; rw_enter(&zf->zf_rwlock, RW_READER); /* * Find matching prefetch stream. Depending on whether the accesses * are block-aligned, first block of the new access may either follow * the last block of the previous access, or be equal to it. */ for (zs = list_head(&zf->zf_stream); zs != NULL; zs = list_next(&zf->zf_stream, zs)) { if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) { mutex_enter(&zs->zs_lock); /* * zs_blkid could have changed before we * acquired zs_lock; re-check them here. */ if (blkid == zs->zs_blkid) { break; } else if (blkid + 1 == zs->zs_blkid) { blkid++; nblks--; if (nblks == 0) { /* Already prefetched this before. */ mutex_exit(&zs->zs_lock); rw_exit(&zf->zf_rwlock); return; } break; } mutex_exit(&zs->zs_lock); } } if (zs == NULL) { /* * This access is not part of any existing stream. Create * a new stream for it. */ ZFETCHSTAT_BUMP(zfetchstat_misses); if (rw_tryupgrade(&zf->zf_rwlock)) dmu_zfetch_stream_create(zf, end_of_access_blkid); rw_exit(&zf->zf_rwlock); return; } /* * This access was to a block that we issued a prefetch for on * behalf of this stream. Issue further prefetches for this stream. * * Normally, we start prefetching where we stopped * prefetching last (zs_pf_blkid). But when we get our first * hit on this stream, zs_pf_blkid == zs_blkid, we don't * want to prefetch the block we just accessed. In this case, * start just after the block we just accessed. */ pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid); /* * Double our amount of prefetched data, but don't let the * prefetch get further ahead than zfetch_max_distance. */ if (fetch_data) { max_dist_blks = zfetch_max_distance >> zf->zf_dnode->dn_datablkshift; /* * Previously, we were (zs_pf_blkid - blkid) ahead. We * want to now be double that, so read that amount again, * plus the amount we are catching up by (i.e. the amount * read just now). */ pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks; max_blks = max_dist_blks - (pf_start - end_of_access_blkid); pf_nblks = MIN(pf_ahead_blks, max_blks); } else {
/* * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL. * Otherwise, simply return cached dv_node's. Hotplug code always call * devfs_clean() to invalid the dv_node cache. */ static int devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp) { struct dv_node *ddv, *dv; struct dirent64 *de, *bufp; offset_t diroff; offset_t soff; size_t reclen, movesz; int error; struct vattr va; size_t bufsz; ddv = VTODV(dvp); dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n", ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len)); ASSERT(ddv->dv_attr || ddv->dv_attrvp); ASSERT(RW_READ_HELD(&ddv->dv_contents)); if (uiop->uio_loffset >= MAXOFF_T) { if (eofp) *eofp = 1; return (0); } if (uiop->uio_iovcnt != 1) return (EINVAL); if (dvp->v_type != VDIR) return (ENOTDIR); /* Load the initial contents */ if (ddv->dv_flags & DV_BUILD) { if (!rw_tryupgrade(&ddv->dv_contents)) { rw_exit(&ddv->dv_contents); rw_enter(&ddv->dv_contents, RW_WRITER); } /* recheck and fill */ if (ddv->dv_flags & DV_BUILD) dv_filldir(ddv); rw_downgrade(&ddv->dv_contents); } soff = uiop->uio_offset; bufsz = uiop->uio_iov->iov_len; de = bufp = kmem_alloc(bufsz, KM_SLEEP); movesz = 0; dv = (struct dv_node *)-1; /* * Move as many entries into the uio structure as it will take. * Special case "." and "..". */ diroff = 0; if (soff == 0) { /* . */ reclen = DIRENT64_RECLEN(strlen(".")); if ((movesz + reclen) > bufsz) goto full; de->d_ino = (ino64_t)ddv->dv_ino; de->d_off = (off64_t)diroff + 1; de->d_reclen = (ushort_t)reclen; /* use strncpy(9f) to zero out uninitialized bytes */ (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen)); movesz += reclen; de = (dirent64_t *)((char *)de + reclen); dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' " "reclen %lu\n", diroff, soff, ".", reclen)); } diroff++; if (soff <= 1) { /* .. */ reclen = DIRENT64_RECLEN(strlen("..")); if ((movesz + reclen) > bufsz) goto full; de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino; de->d_off = (off64_t)diroff + 1; de->d_reclen = (ushort_t)reclen; /* use strncpy(9f) to zero out uninitialized bytes */ (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen)); movesz += reclen; de = (dirent64_t *)((char *)de + reclen); dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' " "reclen %lu\n", diroff, soff, "..", reclen)); } diroff++; for (dv = ddv->dv_dot; dv; dv = dv->dv_next, diroff++) { /* * although DDM_INTERNAL_PATH minor nodes are skipped for * readdirs outside the kernel, they still occupy directory * offsets */ if (diroff < soff || ((dv->dv_flags & DV_INTERNAL) && (cred != kcred))) continue; reclen = DIRENT64_RECLEN(strlen(dv->dv_name)); if ((movesz + reclen) > bufsz) { dcmn_err3(("devfs_readdir: C: diroff " "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, dv->dv_name, reclen)); goto full; } de->d_ino = (ino64_t)dv->dv_ino; de->d_off = (off64_t)diroff + 1; de->d_reclen = (ushort_t)reclen; /* use strncpy(9f) to zero out uninitialized bytes */ ASSERT(strlen(dv->dv_name) + 1 <= DIRENT64_NAMELEN(reclen)); (void) strncpy(de->d_name, dv->dv_name, DIRENT64_NAMELEN(reclen)); movesz += reclen; de = (dirent64_t *)((char *)de + reclen); dcmn_err4(("devfs_readdir: D: diroff " "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff, dv->dv_name, reclen)); } /* the buffer is full, or we exhausted everything */ full: dcmn_err3(("devfs_readdir: moving %lu bytes: " "diroff %lld, soff %lld, dv %p\n", movesz, diroff, soff, (void *)dv)); if ((movesz == 0) && dv) error = EINVAL; /* cannot be represented */ else { error = uiomove(bufp, movesz, UIO_READ, uiop); if (error == 0) { if (eofp) *eofp = dv ? 0 : 1; uiop->uio_offset = diroff; } va.va_mask = AT_ATIME; gethrestime(&va.va_atime); rw_exit(&ddv->dv_contents); (void) devfs_setattr(dvp, &va, 0, cred, NULL); rw_enter(&ddv->dv_contents, RW_READER); } kmem_free(bufp, bufsz); return (error); }