/* * Program the macaddress and vlans of a port. * * Returns 0 on sucess, 1 on failure. */ static int vsw_set_if_hw_addr(vsw_t *vswp) { mac_diag_t diag; uint8_t *macaddr; uint8_t primary_addr[ETHERADDRL]; uint16_t vid = VLAN_ID_NONE; int rv; uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | MAC_UNICAST_STRIP_DISABLE; D1(vswp, "%s: enter", __func__); ASSERT(RW_WRITE_HELD(&vswp->maccl_rwlock)); if (vswp->mch == NULL) return (0); macaddr = (uint8_t *)vswp->if_addr.ether_addr_octet; /* check if it is the primary macaddr of the card. */ mac_unicast_primary_get(vswp->mh, primary_addr); if (ether_cmp((void *)primary_addr, (void*)macaddr) == 0) { mac_flags |= MAC_UNICAST_PRIMARY; } /* * If the interface has a specific 'pvid', then * register with that vlan-id, otherwise register * with VLAN_ID_NONE. */ if (vswp->pvid != vswp->default_vlan_id) { vid = vswp->pvid; } if (!(vswp->smode & VSW_LAYER2_PROMISC)) { mac_flags |= MAC_UNICAST_HW; } if (vswp->addr_set == B_FALSE) { vswp->muh = NULL; rv = mac_unicast_add(vswp->mch, macaddr, mac_flags, &vswp->muh, vid, &diag); if (rv != 0) { cmn_err(CE_WARN, "vsw%d: Failed to program" "macaddr,vid(%s, %d) err=%d", vswp->instance, ether_sprintf((void *)macaddr), vid, rv); return (rv); } vswp->addr_set = B_TRUE; D2(vswp, "%s:programmed macaddr(%s) vid(%d) into device %s", __func__, ether_sprintf((void *)macaddr), vid, vswp->physname); } vsw_mac_add_vlans(vswp, vswp->mch, macaddr, mac_flags, vswp->vids, vswp->nvids); vsw_maccl_set_bandwidth(vswp, NULL, VSW_LOCALDEV, vswp->bandwidth); mac_rx_set(vswp->mch, vsw_if_rx_cb, (void *)vswp); D1(vswp, "%s: exit", __func__); return (rv); }
int ud_dircheckforname(struct ud_inode *tdp, char *namep, int32_t namelen, struct slot *slotp, struct ud_inode **ipp, uint8_t *buf, struct cred *cr) { struct udf_vfs *udf_vfsp; uint32_t dirsize, offset; struct fbuf *fbp; struct file_id *fid; int32_t sz, error = 0, sz_req, matched = 0; uint8_t *nm; uint8_t *dname; int32_t id_len; ud_printf("ud_dircheckforname\n"); ASSERT(RW_WRITE_HELD(&tdp->i_rwlock)); fbp = NULL; dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); udf_vfsp = tdp->i_udf; offset = 0; dirsize = tdp->i_size; if (slotp->status != FOUND) { int32_t temp; temp = 1024; /* set to size of dname allocated above */ if ((error = ud_compress(namelen, &temp, (uint8_t *)namep, dname)) != 0) { goto end; } sz_req = F_LEN + temp; sz_req = (sz_req + 3) & ~3; } while (offset < dirsize) { if ((error = ud_get_next_fid(tdp, &fbp, offset, &fid, &nm, buf)) != 0) { break; } if ((error = ud_uncompress(fid->fid_idlen, &id_len, nm, dname)) != 0) { break; } if ((fid->fid_flags & FID_DELETED) == 0) { /* Check for name match */ if (((namelen == id_len) && (strncmp(namep, (caddr_t)dname, namelen) == 0)) || ((fid->fid_flags & FID_PARENT) && (namep[0] == '.' && (namelen == 1 || (namelen == 2 && namep[1] == '.'))))) { tdp->i_diroff = offset; if ((fid->fid_flags & FID_PARENT) && (namelen == 1) && (namep[0] == '.')) { struct vnode *vp = ITOV(tdp); *ipp = tdp; VN_HOLD(vp); } else { uint16_t prn; uint32_t loc; prn = SWAP_16(fid->fid_icb.lad_ext_prn); loc = SWAP_32(fid->fid_icb.lad_ext_loc); if ((error = ud_iget(tdp->i_vfs, prn, loc, ipp, NULL, cr)) != 0) { fbrelse(fbp, S_OTHER); goto end; } } slotp->status = EXIST; slotp->offset = offset; slotp->size = FID_LEN(fid); slotp->fbp = fbp; slotp->ep = fid; slotp->endoff = 0; goto end; } } else { /* * see if we need to find an * empty slot and the current slot * matches */ if ((slotp->status != FOUND) || (matched == 0)) { sz = FID_LEN(fid); if (sz == sz_req) { slotp->status = FOUND; slotp->offset = offset; slotp->size = sz; } if (matched == 0) { if ((namelen == id_len) && (strncmp(namep, (caddr_t)dname, namelen) == 0)) { matched = 1; slotp->status = FOUND; slotp->offset = offset; slotp->size = sz; } } } } offset += FID_LEN(fid); } if (fbp) { fbrelse(fbp, S_OTHER); } if (slotp->status == NONE) { /* * We didn't find a slot; the new directory entry should be put * at the end of the directory. Return an indication of where * this is, and set "endoff" to zero; since we're going to have * to extend the directory, we're certainly not going to * trucate it. */ slotp->offset = dirsize; if (tdp->i_desc_type == ICB_FLAG_ONE_AD) { slotp->size = tdp->i_max_emb - tdp->i_size; } else { slotp->size = udf_vfsp->udf_lbsize - slotp->offset & udf_vfsp->udf_lbmask; } slotp->endoff = 0; } *ipp = NULL; end: kmem_free((caddr_t)dname, 1024); return (error); }
/* * Program the macaddress and vlans of a port. * * Returns 0 on sucess, 1 on failure. */ static int vsw_set_port_hw_addr(vsw_port_t *port) { vsw_t *vswp = port->p_vswp; mac_diag_t diag; uint8_t *macaddr; uint16_t vid = VLAN_ID_NONE; int rv; uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | MAC_UNICAST_STRIP_DISABLE; D1(vswp, "%s: enter", __func__); ASSERT(RW_WRITE_HELD(&port->maccl_rwlock)); if (port->p_mch == NULL) return (0); /* * If the port has a specific 'pvid', then * register with that vlan-id, otherwise register * with VLAN_ID_NONE. */ if (port->pvid != vswp->default_vlan_id) { vid = port->pvid; } macaddr = (uint8_t *)port->p_macaddr.ether_addr_octet; if (!(vswp->smode & VSW_LAYER2_PROMISC)) { mac_flags |= MAC_UNICAST_HW; } if (port->addr_set == B_FALSE) { port->p_muh = NULL; rv = mac_unicast_add(port->p_mch, macaddr, mac_flags, &port->p_muh, vid, &diag); if (rv != 0) { cmn_err(CE_WARN, "vsw%d: Failed to program" "macaddr,vid(%s, %d) err=%d", vswp->instance, ether_sprintf((void *)macaddr), vid, rv); return (rv); } port->addr_set = B_TRUE; D2(vswp, "%s:programmed macaddr(%s) vid(%d) into device %s", __func__, ether_sprintf((void *)macaddr), vid, vswp->physname); } /* Add vlans to the MAC layer */ vsw_mac_add_vlans(vswp, port->p_mch, macaddr, mac_flags, port->vids, port->nvids); /* Configure bandwidth to the MAC layer */ vsw_maccl_set_bandwidth(NULL, port, VSW_VNETPORT, port->p_bandwidth); mac_rx_set(port->p_mch, vsw_port_rx_cb, (void *)port); D1(vswp, "%s: exit", __func__); return (rv); }
static mdeg_clnt_t * mdeg_alloc_clnt(void) { mdeg_clnt_t *clnt; int idx; mdeg_clnt_t *newtbl; uint_t newmaxclnts; uint_t newtblsz; uint_t oldtblsz; ASSERT(RW_WRITE_HELD(&mdeg.rwlock)); /* search for an unused slot in the table */ for (idx = 0; idx < mdeg.maxclnts; idx++) { clnt = &mdeg.tbl[idx]; if (!clnt->valid) { break; } } /* found any empty slot */ if (idx != mdeg.maxclnts) { goto found; } /* * There was no free space in the table. Grow * the table to double its current size. */ MDEG_DBG("client table full:\n"); MDEG_DUMP_TABLE(); newmaxclnts = mdeg.maxclnts * 2; newtblsz = newmaxclnts * sizeof (mdeg_clnt_t); newtbl = kmem_zalloc(newtblsz, KM_SLEEP); /* copy old table data to the new table */ oldtblsz = mdeg.maxclnts * sizeof (mdeg_clnt_t); bcopy(mdeg.tbl, newtbl, oldtblsz); /* * Since the old table was full, the first free entry * will be just past the end of the old table data in * the new table. */ clnt = &newtbl[mdeg.maxclnts]; /* clean up the old table */ kmem_free(mdeg.tbl, oldtblsz); mdeg.tbl = newtbl; mdeg.maxclnts = newmaxclnts; found: ASSERT(clnt->valid == 0); clnt->hdl = MDEG_ALLOC_HDL(idx, MDEG_HDL2COUNT(clnt->hdl)); return (clnt); }
/* * Locking i_contents in this * function seems to be really weird */ int ud_dirremove( struct ud_inode *dp, char *namep, struct ud_inode *oip, struct vnode *cdir, enum dr_op op, struct cred *cr, caller_context_t *ctp) { struct udf_vfs *udf_vfsp; int32_t namelen, err = 0; struct slot slot; struct ud_inode *ip; mode_t mode; struct file_id *fid; uint8_t *buf = NULL; uint32_t tbno; ud_printf("ud_dirremove\n"); ASSERT(RW_WRITE_HELD(&dp->i_rwlock)); udf_vfsp = dp->i_udf; namelen = (int)strlen(namep); if (namelen == 0) { cmn_err(CE_WARN, "name length == 0 in ud_dirremove"); return (EINVAL); } /* * return err when removing . and .. */ if (namep[0] == '.') { if (namelen == 1) { return (EINVAL); } else if (namelen == 2 && namep[1] == '.') { return (EEXIST); /* SIGH should be ENOTEMPTY */ } } ASSERT(RW_WRITE_HELD(&dp->i_rwlock)); /* * Check accessibility of directory. */ if (dp->i_type != VDIR) { return (ENOTDIR); } ip = NULL; slot.status = FOUND; /* don't need to look for empty slot */ slot.offset = 0; slot.size = 0; slot.fbp = NULL; slot.ep = NULL; slot.endoff = 0; /* * Execute access is required to search the directory. * Access for write is interpreted as allowing * deletion of files in the directory. */ if (err = ud_iaccess(dp, IEXEC|IWRITE, cr)) { return (err); } buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); rw_enter(&dp->i_contents, RW_WRITER); if (err = ud_dircheckforname(dp, namep, namelen, &slot, &ip, buf, cr)) { goto out_novfs; } if (ip == NULL) { err = ENOENT; goto out_novfs; } if (oip && oip != ip) { err = ENOENT; goto out_novfs; } if ((mode = ip->i_type) == VDIR) { /* * vn_vfswlock() prevents races between mount and rmdir. */ if (vn_vfswlock(ITOV(ip))) { err = EBUSY; goto out_novfs; } if (vn_mountedvfs(ITOV(ip)) != NULL && op != DR_RENAME) { err = EBUSY; goto out; } /* * If we are removing a directory, get a lock on it. * If the directory is empty, it will stay empty until * we can remove it. */ rw_enter(&ip->i_rwlock, RW_READER); } /* We must be holding i_contents */ rw_enter(&ip->i_contents, RW_READER); if (err = ud_sticky_remove_access(dp, ip, cr)) { rw_exit(&ip->i_contents); if (mode == VDIR) { rw_exit(&ip->i_rwlock); } goto out; } if (op == DR_RMDIR) { /* * For rmdir(2), some special checks are required. * (a) Don't remove any alias of the parent (e.g. "."). * (b) Don't remove the current directory. * (c) Make sure the entry is (still) a directory. * (d) Make sure the directory is empty. */ if (dp == ip || ITOV(ip) == cdir) { err = EINVAL; } else if (ip->i_type != VDIR) { err = ENOTDIR; } else if ((ip->i_nlink != 1) || (!ud_dirempty(ip, dp->i_uniqid, cr))) { /* * Directories do not have an * entry for "." so only one link * will be there */ err = EEXIST; /* SIGH should be ENOTEMPTY */ } if (err) { rw_exit(&ip->i_contents); if (mode == VDIR) { rw_exit(&ip->i_rwlock); } goto out; } } else if (op == DR_REMOVE) { /* * unlink(2) requires a different check: allow only * privileged processes to unlink a directory. */ struct vnode *vp = ITOV(ip); if (vp->v_type == VDIR && secpolicy_fs_linkdir(cr, vp->v_vfsp)) { err = EPERM; rw_exit(&ip->i_contents); rw_exit(&ip->i_rwlock); goto out; } } rw_exit(&ip->i_contents); /* * Remove the cache'd entry, if any. */ dnlc_remove(ITOV(dp), namep); /* * We can collapse all the directory * entries that are deleted into one big entry * but the better way is to * defer it till next directory entry * creation. where we can do this * in a more efficient way */ fid = slot.ep; /* * If this is the last entry * just truncate the file instead * of marking it deleted */ if ((slot.offset + FID_LEN(fid)) == dp->i_size) { fbrelse(slot.fbp, S_OTHER); if ((err = ud_itrunc(dp, slot.offset, 0, cr)) != 0) { goto out; } } else { fid->fid_flags |= FID_DELETED; if ((err = ud_ip_off2bno(dp, slot.offset, &tbno)) != 0) { goto out; } ud_make_tag(dp->i_udf, &fid->fid_tag, UD_FILE_ID_DESC, tbno, FID_LEN(fid)); err = ud_write_fid(dp, &slot, buf); } slot.fbp = NULL; /* * If we were removing a directory, it is 'gone' now so we can * unlock it. */ if (mode == VDIR) { rw_exit(&ip->i_rwlock); } mutex_enter(&dp->i_tlock); dp->i_flag |= IUPD|ICHG; mutex_exit(&dp->i_tlock); mutex_enter(&ip->i_tlock); ip->i_flag |= ICHG; mutex_exit(&ip->i_tlock); if (err != 0) { goto out; } rw_enter(&ip->i_contents, RW_WRITER); /* * Now dispose of the inode. */ if (ip->i_nlink > 0) { if ((op == DR_RMDIR) && (ip->i_type == VDIR)) { /* * Decrement by 1 because there is no "." * Clear the inode, but there may be other hard * links so don't free the inode. * Decrement the dp linkcount because we're * trashing the ".." entry. */ ip->i_nlink --; dp->i_nlink--; dnlc_remove(ITOV(ip), "."); dnlc_remove(ITOV(ip), ".."); /* * (void) ud_itrunc(ip, 0, 0, cr); */ } else { ip->i_nlink--; } } ITIMES_NOLOCK(dp); ITIMES_NOLOCK(ip); rw_exit(&ip->i_contents); out: if (mode == VDIR) { vn_vfsunlock(ITOV(ip)); } out_novfs: ASSERT(RW_WRITE_HELD(&dp->i_contents)); if (slot.fbp != NULL) { fbrelse(slot.fbp, S_OTHER); } rw_exit(&dp->i_contents); if (ip) { /* * If no errors, send any events after locks are dropped, * but before the VN_RELE(). */ if (err == 0) { if (op == DR_REMOVE) { vnevent_remove(ITOV(ip), ITOV(dp), namep, ctp); } else if (op == DR_RMDIR) { vnevent_rmdir(ITOV(ip), ITOV(dp), namep, ctp); } } VN_RELE(ITOV(ip)); } kmem_free(buf, udf_vfsp->udf_lbsize); return (err); }
int sam_reset_client_ino( sam_node_t *ip, /* Pointer to inode table */ int cmd, sam_ino_record_t *irec) /* Inode instance info */ { offset_t real_size; sam_timestruc_t modify_time; sam_time_t residence_time; int error = 0; #ifdef linux struct inode *li; #endif /* linux */ #ifdef sun int was_rmchardev = 0; vnode_t *vp = SAM_ITOV(ip); #endif ASSERT(RW_WRITE_HELD(&ip->inode_rwl)); /* * Check for out of order sequence inode information. Only reset the * inode if the inode sequence number indicates this is the first time * we are resetting the inode (ip->cl_ino_seqno = 0), the server changed * (failover or remount), we are forcing the reset (ino_seqno = 0), or * this is a newer copy of the inode information. */ if ((ip->cl_ino_seqno != 0) && (ip->cl_srvr_ord == irec->in.srvr_ord) && (ip->cl_ino_gen == irec->in.ino_gen) && (irec->in.seqno != 0)) { if (SAM_SEQUENCE_LATER(ip->cl_ino_seqno, irec->in.seqno)) { TRACE(T_SAM_CL_SEQNO, SAM_ITOP(ip), ip->cl_ino_seqno, ip->cl_ino_gen, ip->cl_srvr_ord); TRACE(T_SAM_SR_SEQNO, SAM_ITOP(ip), irec->in.seqno, irec->in.ino_gen, irec->in.srvr_ord); return (0); } } ip->cl_ino_seqno = irec->in.seqno; ip->cl_ino_gen = irec->in.ino_gen; ip->cl_srvr_ord = irec->in.srvr_ord; modify_time.tv_sec = ip->di.modify_time.tv_sec; modify_time.tv_nsec = ip->di.modify_time.tv_nsec; residence_time = ip->di.residence_time; real_size = ip->di.rm.size; #ifdef linux if (S_ISLNK(ip->di.mode) && (ip->di.ext_attrs & ext_sln)) { real_size = ip->di.psize.symlink; } #endif /* linux */ #ifdef sun if (ip->di.rm.ui.flags & RM_CHAR_DEV_FILE && (vp->v_type == VCHR)) { was_rmchardev = 1; } #endif ASSERT(((ip->di.id.ino == irec->di.id.ino) && (ip->di.id.gen == irec->di.id.gen)) || (ip->di.mode == 0)); ip->di = irec->di; /* Move disk image to incore inode */ ip->di2 = irec->di2; ip->updtime = SAM_SECOND(); /* * Update server returned attributes - incore inode information. * * Update file size only if we were not the "owner" of the size * at the time it was transmitted by the server. This avoids a * race condition where we could get a stale size. Note, for * the first append, even though we are the owner, trust the size * from the server. In all other cases where we are the owner, need * to restore the on-disk size from the client's size. */ #ifdef sun if ((irec->sr_attr.size_owner != ip->mp->ms.m_client_ord) || (irec->sr_attr.actions & SR_FORCE_SIZE)) { ip->size = irec->sr_attr.current_size; if (SAM_IS_OBJECT_FILE(ip)) { /* * When we move the disk image into the incore inode, * this is the first time the object file bit is set. * This is why the layout was not set in sam_get_ino. * If this is a reset for a NAME_create, we need to * create the object layout. Otherwise the object * layout exists and we just need to set the end * of object (eoo) for each stripe. Note, the end of * object is set if we are creating the object layout. */ if (ip->olp == NULL) { sam_osd_create_obj_layout(ip); } else { (void) sam_set_end_of_obj(ip, ip->di.rm.size, 1); } } } else { ip->di.rm.size = real_size; } if (was_rmchardev) { mutex_enter(&vp->v_lock); if (!(ip->di.rm.ui.flags & RM_CHAR_DEV_FILE) && (vp->v_type == VCHR) && (ip->no_opens == 0) && (cmd != SAM_CMD_LEASE)) { /* * The RM_CHAR_DEV_FILE flag was removed. If it's not * open locally and didn't just get a lease clean it up. * If it's open locally unpredictable things may happen. */ sam_detach_aiofile(vp); } mutex_exit(&vp->v_lock); } #endif /* sun */ #ifdef linux li = SAM_SITOLI(ip); if ((irec->sr_attr.size_owner != ip->mp->ms.m_client_ord) || (irec->sr_attr.actions & SR_FORCE_SIZE)) { ip->size = irec->sr_attr.current_size; if (li) { if (ip->di.status.b.offline) { rfs_i_size_write(li, real_size); } else { rfs_i_size_write(li, ip->size); } } } else { ip->di.rm.size = real_size; } if (li) { /* * Update the Linux inode */ li->i_mode = ip->di.mode; li->i_nlink = ip->di.nlink; li->i_uid = ip->di.uid; li->i_gid = ip->di.gid; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) li->i_atime.tv_sec = ip->di.access_time.tv_sec; li->i_mtime.tv_sec = ip->di.modify_time.tv_sec; li->i_ctime.tv_sec = ip->di.change_time.tv_sec; #else li->i_atime = ip->di.access_time.tv_sec; li->i_mtime = ip->di.modify_time.tv_sec; li->i_ctime = ip->di.change_time.tv_sec; #endif li->i_blocks = (u_longlong_t)ip->di.blocks * (u_longlong_t)(SAM_BLK/DEV_BSIZE); } #endif /* linux */ /* * Server controls stage size (amount of file online) and allocated * size. Always update these. */ ip->stage_size = irec->sr_attr.stage_size; ip->cl_allocsz = irec->sr_attr.alloc_size; if (ip->di.status.b.direct_map) { /* * clear since we can't ever extend it */ ip->cl_alloc_unit = 0; } if (ip->di.status.b.offline) { ip->stage_err = irec->sr_attr.stage_err; } else { ip->stage_err = 0; } /* * If file changed since we last updated this inode, and this is not a * lease command and we don't hold any SAM_DATA_MODIFYING_LEASES, then * stale indirect blocks & pages. The server controls this explicitly * for lease commands. */ if ((cmd != SAM_CMD_LEASE) && !(ip->cl_leases & SAM_DATA_MODIFYING_LEASES)) { if ((ip->di.modify_time.tv_sec != modify_time.tv_sec) || (ip->di.modify_time.tv_nsec != modify_time.tv_nsec) || (residence_time != ip->di.residence_time) || (real_size != ip->di.rm.size)) { irec->sr_attr.actions |= (SR_INVAL_PAGES | SR_STALE_INDIRECT); irec->sr_attr.offset = 0; if (irec->sr_attr.size_owner != ip->mp->ms.m_client_ord) { sam_set_size(ip); } } } sam_clear_map_cache(ip); return (error); }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; znode_t *zp; uint64_t sa_obj = 0; ASSERT(RRM_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); /* * We already own this, so just hold and rele it to update the * objset_t, as the one we had before may have been evicted. */ VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os)); VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb); VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset)); dmu_objset_rele(zsb->z_os, zsb); /* * Make sure version hasn't changed */ err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION, &zsb->z_version); if (err) goto bail; err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (err && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; if (zsb->z_version >= ZPL_VERSION_SA) sa_register_update_callback(zsb->z_os, zfs_sa_upgrade); VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zfs_set_fuid_feature(zsb); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrm_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't setup the sa framework, try to force * unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
/* ARGSUSED2 */ int ud_dirmakedirect(struct ud_inode *ip, struct ud_inode *dp, struct cred *cr) { int32_t err; uint32_t blkno, size, parent_len, tbno; struct fbuf *fbp; struct file_id *fid; struct icb_ext *iext; ud_printf("ud_dirmakedirect\n"); ASSERT(RW_WRITE_HELD(&ip->i_contents)); ASSERT(RW_WRITE_HELD(&dp->i_rwlock)); parent_len = sizeof (struct file_id); if ((ip->i_desc_type != ICB_FLAG_ONE_AD) || (parent_len > ip->i_max_emb)) { ASSERT(ip->i_ext); /* * Allocate space for the directory we're creating. */ if ((err = ud_alloc_space(ip->i_vfs, ip->i_icb_prn, 0, 1, &blkno, &size, 0, 0)) != 0) { return (err); } /* * init with the size of * directory with just the * parent */ ip->i_size = sizeof (struct file_id); ip->i_flag |= IUPD|ICHG|IATTCHG; iext = ip->i_ext; iext->ib_prn = ip->i_icb_prn; iext->ib_block = blkno; iext->ib_count = ip->i_size; iext->ib_offset = 0; ip->i_ext_used = 1; } else { ip->i_size = sizeof (struct file_id); ip->i_flag |= IUPD|ICHG|IATTCHG; } ITIMES_NOLOCK(ip); /* * Update the dp link count and write out the change. * This reflects the ".." entry we'll soon write. */ if (dp->i_nlink == MAXLINK) { return (EMLINK); } dp->i_nlink++; dp->i_flag |= ICHG; ud_iupdat(dp, 1); /* * Initialize directory with ".." * Since the parent directory is locked, we don't have to * worry about anything changing when we drop the write * lock on (ip). */ rw_exit(&ip->i_contents); if ((err = fbread(ITOV(ip), (offset_t)0, ip->i_udf->udf_lbsize, S_WRITE, &fbp)) != 0) { rw_enter(&ip->i_contents, RW_WRITER); return (err); } bzero(fbp->fb_addr, ip->i_udf->udf_lbsize); fid = (struct file_id *)fbp->fb_addr; fid->fid_ver = SWAP_16(1); fid->fid_flags = FID_DIR | FID_PARENT; fid->fid_icb.lad_ext_len = SWAP_32(dp->i_udf->udf_lbsize); fid->fid_icb.lad_ext_loc = SWAP_32(dp->i_icb_block); fid->fid_icb.lad_ext_prn = SWAP_16(dp->i_icb_prn); /* * fid_idlen, fid_iulen and fid_spec are zero * due to bzero above */ if ((err = ud_ip_off2bno(ip, 0, &tbno)) == 0) { ud_make_tag(ip->i_udf, &fid->fid_tag, UD_FILE_ID_DESC, tbno, FID_LEN(fid)); } err = ud_fbwrite(fbp, ip); rw_enter(&ip->i_contents, RW_WRITER); return (err); }
int ud_dirrename(struct ud_inode *sdp, struct ud_inode *sip, struct ud_inode *tdp, struct ud_inode *tip, char *namep, uint8_t *buf, struct slot *slotp, struct cred *cr) { int32_t error = 0, doingdirectory; struct file_id *fid; ud_printf("ud_dirrename\n"); ASSERT(sdp->i_udf != NULL); ASSERT(MUTEX_HELD(&sdp->i_udf->udf_rename_lck)); ASSERT(RW_WRITE_HELD(&tdp->i_rwlock)); ASSERT(buf); ASSERT(slotp->ep); fid = slotp->ep; /* * Short circuit rename of something to itself. */ if (sip->i_icb_lbano == tip->i_icb_lbano) { return (ESAME); /* special KLUDGE error code */ } /* * Everything is protected under the vfs_rename_lock so the ordering * of i_contents locks doesn't matter here. */ rw_enter(&sip->i_contents, RW_READER); rw_enter(&tip->i_contents, RW_READER); /* * Check that everything is on the same filesystem. */ if ((ITOV(tip)->v_vfsp != ITOV(tdp)->v_vfsp) || (ITOV(tip)->v_vfsp != ITOV(sip)->v_vfsp)) { error = EXDEV; /* XXX archaic */ goto out; } /* * Must have write permission to rewrite target entry. */ if ((error = ud_iaccess(tdp, IWRITE, cr)) != 0 || (error = ud_sticky_remove_access(tdp, tip, cr)) != 0) goto out; /* * Ensure source and target are compatible (both directories * or both not directories). If target is a directory it must * be empty and have no links to it; in addition it must not * be a mount point, and both the source and target must be * writable. */ doingdirectory = (sip->i_type == VDIR); if (tip->i_type == VDIR) { if (!doingdirectory) { error = EISDIR; goto out; } /* * vn_vfswlock will prevent mounts from using the directory * until we are done. */ if (vn_vfswlock(ITOV(tip))) { error = EBUSY; goto out; } if (vn_mountedvfs(ITOV(tip)) != NULL) { vn_vfsunlock(ITOV(tip)); error = EBUSY; goto out; } if (!ud_dirempty(tip, tdp->i_uniqid, cr) || tip->i_nlink > 2) { vn_vfsunlock(ITOV(tip)); error = EEXIST; /* SIGH should be ENOTEMPTY */ goto out; } } else if (doingdirectory) { error = ENOTDIR; goto out; } /* * Rewrite the inode pointer for target name entry * from the target inode (ip) to the source inode (sip). * This prevents the target entry from disappearing * during a crash. Mark the directory inode to reflect the changes. */ dnlc_remove(ITOV(tdp), namep); fid->fid_icb.lad_ext_prn = SWAP_16(sip->i_icb_prn); fid->fid_icb.lad_ext_loc = SWAP_32(sip->i_icb_block); dnlc_enter(ITOV(tdp), namep, ITOV(sip)); ud_make_tag(tdp->i_udf, &fid->fid_tag, UD_FILE_ID_DESC, SWAP_32(fid->fid_tag.tag_loc), FID_LEN(fid)); error = ud_write_fid(tdp, slotp, buf); if (error) { if (doingdirectory) { vn_vfsunlock(ITOV(tip)); } goto out; } /* * Upgrade to write lock on tip */ rw_exit(&tip->i_contents); rw_enter(&tip->i_contents, RW_WRITER); mutex_enter(&tdp->i_tlock); tdp->i_flag |= IUPD|ICHG; mutex_exit(&tdp->i_tlock); /* * Decrement the link count of the target inode. * Fix the ".." entry in sip to point to dp. * This is done after the new entry is on the disk. */ tip->i_nlink--; mutex_enter(&tip->i_tlock); tip->i_flag |= ICHG; mutex_exit(&tip->i_tlock); if (doingdirectory) { /* * The entry for tip no longer exists so I can unlock the * vfslock. */ vn_vfsunlock(ITOV(tip)); /* * Decrement target link count once more if it was a directory. */ if (tip->i_nlink != 0) { cmn_err(CE_WARN, "ud_direnter: target directory link count != 0"); rw_exit(&tip->i_contents); rw_exit(&sip->i_contents); return (EINVAL); } /* * Renaming a directory with the parent different * requires that ".." be rewritten. The window is * still there for ".." to be inconsistent, but this * is unavoidable, and a lot shorter than when it was * done in a user process. We decrement the link * count in the new parent as appropriate to reflect * the just-removed target. If the parent is the * same, this is appropriate since the original * directory is going away. If the new parent is * different, dirfixdotdot() will bump the link count * back. */ tdp->i_nlink--; mutex_enter(&tdp->i_tlock); tdp->i_flag |= ICHG; mutex_exit(&tdp->i_tlock); ITIMES_NOLOCK(tdp); if (sdp != tdp) { rw_exit(&tip->i_contents); rw_exit(&sip->i_contents); error = ud_dirfixdotdot(sip, sdp, tdp); return (error); } } out: rw_exit(&tip->i_contents); rw_exit(&sip->i_contents); return (error); }
int ud_dircheckpath(int32_t blkno, struct ud_inode *target, struct cred *cr) { int32_t err = 0; struct vfs *vfsp; struct udf_vfs *udf_vfsp; struct fbuf *fbp; struct file_id *fid; struct ud_inode *ip, *tip; uint16_t prn; uint32_t lbno, dummy, tbno; daddr_t parent_icb_loc; ud_printf("ud_dircheckpath\n"); udf_vfsp = target->i_udf; ip = target; ASSERT(udf_vfsp != NULL); ASSERT(MUTEX_HELD(&target->i_udf->udf_rename_lck)); ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); if (ip->i_icb_lbano == blkno) { err = EINVAL; goto out; } if (ip->i_icb_lbano == udf_vfsp->udf_root_blkno) { goto out; } /* * Search back through the directory tree, using the PARENT entries * Fail any attempt to move a directory into an ancestor directory. */ for (;;) { if ((err = fbread(ITOV(ip), 0, udf_vfsp->udf_lbsize, S_READ, &fbp)) != 0) { break; } if ((err = ud_ip_off2bno(ip, 0, &tbno)) != 0) { break; } fid = (struct file_id *)fbp->fb_addr; /* IS this a valid file_identifier */ if (ud_verify_tag_and_desc(&fid->fid_tag, UD_FILE_ID_DESC, tbno, 1, udf_vfsp->udf_lbsize) != 0) { break; } if ((fid->fid_flags & FID_DELETED) != 0) { break; } if ((fid->fid_flags & FID_PARENT) == 0) { /* * This cannot happen unless * something is grossly wrong * First entry has to be parent */ break; } prn = SWAP_16(fid->fid_icb.lad_ext_prn); lbno = SWAP_32(fid->fid_icb.lad_ext_loc); parent_icb_loc = ud_xlate_to_daddr(udf_vfsp, prn, lbno, 1, &dummy); ASSERT(dummy == 1); if (parent_icb_loc == blkno) { err = EINVAL; break; } vfsp = ip->i_vfs; udf_vfsp = ip->i_udf; if (parent_icb_loc == udf_vfsp->udf_root_blkno) { break; } if (fbp != NULL) { fbrelse(fbp, S_OTHER); fbp = NULL; } if (ip != target) { rw_exit(&ip->i_rwlock); VN_RELE(ITOV(ip)); } /* * Race to get the inode. */ if (err = ud_iget(vfsp, prn, lbno, &tip, NULL, cr)) { ip = NULL; break; } ip = tip; rw_enter(&ip->i_rwlock, RW_READER); } if (fbp) { fbrelse(fbp, S_OTHER); } out: if (ip) { if (ip != target) { rw_exit(&ip->i_rwlock); VN_RELE(ITOV(ip)); } } return (err); }
/* * Enter the file sip in the directory tdp with name namep. */ int ud_diraddentry(struct ud_inode *tdp, char *namep, enum de_op op, int32_t namelen, struct slot *slotp, struct ud_inode *sip, struct ud_inode *sdp, struct cred *cr) { struct udf_vfs *udf_vfsp; int32_t error, temp; struct file_id *fid; uint8_t *buf = NULL; ASSERT(RW_WRITE_HELD(&tdp->i_rwlock)); ud_printf("ud_diraddentry\n"); udf_vfsp = sip->i_udf; /* * Check inode to be linked to see if it is in the * same filesystem. */ if (ITOV(tdp)->v_vfsp != ITOV(sip)->v_vfsp) { error = EXDEV; goto bad; } if ((op == DE_RENAME) && (sip->i_type == VDIR)) { if ((error = ud_dirfixdotdot(sip, sdp, tdp)) != 0) { goto bad; } } buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); /* * Fill in entry data. */ fid = (struct file_id *)buf; fid->fid_ver = SWAP_16(1); if (sip->i_type == VDIR) { fid->fid_flags = FID_DIR; } else { fid->fid_flags = 0; } fid->fid_iulen = 0; fid->fid_icb.lad_ext_len = SWAP_32(sip->i_udf->udf_lbsize); fid->fid_icb.lad_ext_loc = SWAP_32(sip->i_icb_block); fid->fid_icb.lad_ext_prn = SWAP_16(sip->i_icb_prn); fid->fid_iulen = 0; temp = udf_vfsp->udf_lbsize - F_LEN; if ((error = ud_compress(namelen, &temp, (uint8_t *)namep, fid->fid_spec)) == 0) { fid->fid_idlen = (uint8_t)temp; error = ud_dirprepareentry(tdp, slotp, buf, cr); } kmem_free(buf, udf_vfsp->udf_lbsize); bad: return (error); }
static int xdirrename( struct xmemnode *fromparent, /* parent directory of source */ struct xmemnode *fromxp, /* source xmemnode */ struct xmemnode *toparent, /* parent directory of target */ char *nm, /* entry we are trying to change */ struct xmemnode *to, /* target xmemnode */ struct xdirent *where, /* target xmemnode directory entry */ struct cred *cred) /* credentials */ { int error = 0; int doingdirectory; timestruc_t now; #if defined(lint) nm = nm; #endif ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); rw_enter(&fromxp->xn_rwlock, RW_READER); rw_enter(&to->xn_rwlock, RW_READER); /* * Check that everything is on the same filesystem. */ if (to->xn_vnode->v_vfsp != toparent->xn_vnode->v_vfsp || to->xn_vnode->v_vfsp != fromxp->xn_vnode->v_vfsp) { error = EXDEV; goto out; } /* * Short circuit rename of something to itself. */ if (fromxp == to) { error = ESAME; /* special KLUDGE error code */ goto out; } /* * Must have write permission to rewrite target entry. */ if (error = xmem_xaccess(fromparent, VWRITE, cred)) goto out; /* * If the parent directory is "sticky", then the user must own * either the parent directory or the destination of the rename, * or else must have permission to write the destination. * Otherwise the destination may not be changed (except by the * privileged users). This implements append-only directories. */ if (error = xmem_sticky_remove_access(toparent, to, cred)) goto out; /* * Ensure source and target are compatible (both directories * or both not directories). If target is a directory it must * be empty and have no links to it; in addition it must not * be a mount point, and both the source and target must be * writable. */ doingdirectory = (fromxp->xn_type == VDIR); if (to->xn_type == VDIR) { if (!doingdirectory) { error = EISDIR; goto out; } /* * vn_vfswlock will prevent mounts from using the directory * until we are done. */ if (vn_vfswlock(XNTOV(to))) { error = EBUSY; goto out; } if (vn_mountedvfs(XNTOV(to)) != NULL) { vn_vfsunlock(XNTOV(to)); error = EBUSY; goto out; } mutex_enter(&to->xn_tlock); if (to->xn_dirents > 2 || to->xn_nlink > 2) { mutex_exit(&to->xn_tlock); vn_vfsunlock(XNTOV(to)); error = EEXIST; /* SIGH should be ENOTEMPTY */ /* * Update atime because checking xn_dirents is * logically equivalent to reading the directory */ gethrestime(&to->xn_atime); goto out; } mutex_exit(&to->xn_tlock); } else if (doingdirectory) { error = ENOTDIR; goto out; } where->xd_xmemnode = fromxp; gethrestime(&now); toparent->xn_mtime = now; toparent->xn_ctime = now; /* * Upgrade to write lock on "to" (i.e., the target xmemnode). */ rw_exit(&to->xn_rwlock); rw_enter(&to->xn_rwlock, RW_WRITER); /* * Decrement the link count of the target xmemnode. */ DECR_COUNT(&to->xn_nlink, &to->xn_tlock); to->xn_ctime = now; if (doingdirectory) { /* * The entry for "to" no longer exists so release the vfslock. */ vn_vfsunlock(XNTOV(to)); /* * Decrement the target link count and delete all entires. */ xdirtrunc(to); ASSERT(to->xn_nlink == 0); /* * Renaming a directory with the parent different * requires that ".." be rewritten. The window is * still there for ".." to be inconsistent, but this * is unavoidable, and a lot shorter than when it was * done in a user process. */ if (fromparent != toparent) xdirfixdotdot(fromxp, fromparent, toparent); } out: rw_exit(&to->xn_rwlock); rw_exit(&fromxp->xn_rwlock); return (error); }
/* * Delete entry xp of name "nm" from dir. * Free dir entry space and decrement link count on xmemnode(s). * * Return 0 on success. */ int xdirdelete( struct xmemnode *dir, struct xmemnode *xp, char *nm, enum dr_op op, struct cred *cred) { register struct xdirent *tpdp; int error; size_t namelen; struct xmemnode *xptmp; timestruc_t now; ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); ASSERT(dir->xn_type == VDIR); ASSERT(nm[0] != '\0'); /* * return error when removing . and .. */ if (nm[0] == '.') { if (nm[1] == '\0') return (EINVAL); if (nm[1] == '.' && nm[2] == '\0') return (EEXIST); /* thus in ufs */ } if (error = xmem_xaccess(dir, VEXEC|VWRITE, cred)) return (error); /* * If the parent directory is "sticky", then the user must * own the parent directory or the file in it, or else must * have permission to write the file. Otherwise it may not * be deleted (except by privileged users). Same as ufs_dirremove. */ if (error = xmem_sticky_remove_access(dir, xp, cred)) return (error); if (dir->xn_dir == NULL) return (ENOENT); tpdp = xmemfs_hash_lookup(nm, dir, 0, &xptmp); if (tpdp == NULL) { /* * If it is gone, some other thread got here first! * Return error ENOENT. */ return (ENOENT); } /* * If the xmemnode in the xdirent changed, we were probably * the victim of a concurrent rename operation. The original * is gone, so return that status (same as UFS). */ if (xp != xptmp) return (ENOENT); xmemfs_hash_out(tpdp); /* * Take tpdp out of the directory list. */ ASSERT(tpdp->xd_next != tpdp); ASSERT(tpdp->xd_prev != tpdp); if (tpdp->xd_prev) { tpdp->xd_prev->xd_next = tpdp->xd_next; } if (tpdp->xd_next) { tpdp->xd_next->xd_prev = tpdp->xd_prev; } /* * If the roving slot pointer happens to match tpdp, * point it at the previous dirent. */ if (dir->xn_dir->xd_prev == tpdp) { dir->xn_dir->xd_prev = tpdp->xd_prev; } ASSERT(tpdp->xd_next != tpdp); ASSERT(tpdp->xd_prev != tpdp); /* * tpdp points to the correct directory entry */ namelen = strlen(tpdp->xd_name) + 1; xmem_memfree(tpdp, sizeof (struct xdirent) + namelen); dir->xn_size -= (sizeof (struct xdirent) + namelen); dir->xn_dirents--; gethrestime(&now); dir->xn_mtime = now; dir->xn_ctime = now; xp->xn_ctime = now; ASSERT(xp->xn_nlink > 0); DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); if (op == DR_RMDIR && xp->xn_type == VDIR) { xdirtrunc(xp); ASSERT(xp->xn_nlink == 0); } return (0); }
/* * Enter a directory entry for 'name' and 'xp' into directory 'dir' * * Returns 0 on success. */ int xdirenter( struct xmount *xm, struct xmemnode *dir, /* target directory to make entry in */ char *name, /* name of entry */ enum de_op op, /* entry operation */ struct xmemnode *fromparent, /* source directory if rename */ struct xmemnode *xp, /* source xmemnode, if link/rename */ struct vattr *va, struct xmemnode **xpp, /* return xmemnode, if create/mkdir */ struct cred *cred) { struct xdirent *xdp; struct xmemnode *found = NULL; int error = 0; char *s; /* * xn_rwlock is held to serialize direnter and dirdeletes */ ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); ASSERT(dir->xn_type == VDIR); /* * Don't allow '/' characters in pathname component * (thus in ufs_direnter()). */ for (s = name; *s; s++) if (*s == '/') return (EACCES); ASSERT(name[0] != '\0'); /* * For link and rename lock the source entry and check the link count * to see if it has been removed while it was unlocked. */ if (op == DE_LINK || op == DE_RENAME) { mutex_enter(&xp->xn_tlock); if (xp->xn_nlink == 0) { mutex_exit(&xp->xn_tlock); return (ENOENT); } if (xp->xn_nlink == MAXLINK) { mutex_exit(&xp->xn_tlock); return (EMLINK); } xp->xn_nlink++; mutex_exit(&xp->xn_tlock); gethrestime(&xp->xn_ctime); } /* * This might be a "dangling detached directory". * it could have been removed, but a reference * to it kept in u_cwd. don't bother searching * it, and with any luck the user will get tired * of dealing with us and cd to some absolute * pathway. *sigh*, thus in ufs, too. */ if (dir->xn_nlink == 0) { error = ENOENT; goto out; } /* * If this is a rename of a directory and the parent is * different (".." must be changed), then the source * directory must not be in the directory hierarchy * above the target, as this would orphan everything * below the source directory. */ if (op == DE_RENAME) { if (xp == dir) { error = EINVAL; goto out; } if (xp->xn_type == VDIR) { if ((fromparent != dir) && (error = xdircheckpath(xp, dir, cred))) { goto out; } } } /* * Search for the entry. Return "found" if it exists. */ xdp = xmemfs_hash_lookup(name, dir, 1, &found); if (xdp) { ASSERT(found); switch (op) { case DE_CREATE: case DE_MKDIR: if (xpp) { *xpp = found; error = EEXIST; } else { xmemnode_rele(found); } break; case DE_RENAME: error = xdirrename(fromparent, xp, dir, name, found, xdp, cred); xmemnode_rele(found); break; case DE_LINK: /* * Can't link to an existing file. */ error = EEXIST; xmemnode_rele(found); break; } } else { /* * The entry does not exist. Check write permission in * directory to see if entry can be created. */ if (error = xmem_xaccess(dir, VWRITE, cred)) goto out; if (op == DE_CREATE || op == DE_MKDIR) { /* * Make new xmemnode and directory entry as required. */ error = xdirmakexnode(dir, xm, va, op, &xp, cred); if (error) goto out; } if (error = xdiraddentry(dir, xp, name, op, fromparent)) { if (op == DE_CREATE || op == DE_MKDIR) { /* * Unmake the inode we just made. */ rw_enter(&xp->xn_rwlock, RW_WRITER); if ((xp->xn_type) == VDIR) { ASSERT(xdp == NULL); /* * cleanup allocs made by xdirinit() */ xdirtrunc(xp); } mutex_enter(&xp->xn_tlock); xp->xn_nlink = 0; mutex_exit(&xp->xn_tlock); gethrestime(&xp->xn_ctime); rw_exit(&xp->xn_rwlock); xmemnode_rele(xp); xp = NULL; } } else if (xpp) { *xpp = xp; } else if (op == DE_CREATE || op == DE_MKDIR) { xmemnode_rele(xp); } } out: if (error && (op == DE_LINK || op == DE_RENAME)) { /* * Undo bumped link count. */ DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); gethrestime(&xp->xn_ctime); } return (error); }
static int auto_lookup( vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, int flags, vnode_t *rdir, cred_t *cred, caller_context_t *ct, int *direntflags, pathname_t *realpnp) { int error = 0; vnode_t *newvp = NULL; vfs_t *vfsp; fninfo_t *dfnip; fnnode_t *dfnp = NULL; fnnode_t *fnp = NULL; char *searchnm; int operation; /* either AUTOFS_LOOKUP or AUTOFS_MOUNT */ dfnip = vfstofni(dvp->v_vfsp); AUTOFS_DPRINT((3, "auto_lookup: dvp=%p (%s) name=%s\n", (void *)dvp, dfnip->fi_map, nm)); if (nm[0] == 0) { VN_HOLD(dvp); *vpp = dvp; return (0); } if (error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) return (error); if (nm[0] == '.' && nm[1] == 0) { VN_HOLD(dvp); *vpp = dvp; return (0); } if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { fnnode_t *pdfnp; pdfnp = (vntofn(dvp))->fn_parent; ASSERT(pdfnp != NULL); /* * Since it is legitimate to have the VROOT flag set for the * subdirectories of the indirect map in autofs filesystem, * rootfnnodep is checked against fnnode of dvp instead of * just checking whether VROOT flag is set in dvp */ if (pdfnp == pdfnp->fn_globals->fng_rootfnnodep) { vnode_t *vp; vfs_rlock_wait(dvp->v_vfsp); if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) { vfs_unlock(dvp->v_vfsp); return (EIO); } vp = dvp->v_vfsp->vfs_vnodecovered; VN_HOLD(vp); vfs_unlock(dvp->v_vfsp); error = VOP_LOOKUP(vp, nm, vpp, pnp, flags, rdir, cred, ct, direntflags, realpnp); VN_RELE(vp); return (error); } else { *vpp = fntovn(pdfnp); VN_HOLD(*vpp); return (0); } } top: dfnp = vntofn(dvp); searchnm = nm; operation = 0; ASSERT(vn_matchops(dvp, auto_vnodeops)); AUTOFS_DPRINT((3, "auto_lookup: dvp=%p dfnp=%p\n", (void *)dvp, (void *)dfnp)); /* * If a lookup or mount of this node is in progress, wait for it * to finish, and return whatever result it got. */ mutex_enter(&dfnp->fn_lock); if (dfnp->fn_flags & (MF_LOOKUP | MF_INPROG)) { mutex_exit(&dfnp->fn_lock); error = auto_wait4mount(dfnp); if (error == AUTOFS_SHUTDOWN) error = ENOENT; if (error == EAGAIN) goto top; if (error) return (error); } else mutex_exit(&dfnp->fn_lock); error = vn_vfsrlock_wait(dvp); if (error) return (error); vfsp = vn_mountedvfs(dvp); if (vfsp != NULL) { error = VFS_ROOT(vfsp, &newvp); vn_vfsunlock(dvp); if (!error) { error = VOP_LOOKUP(newvp, nm, vpp, pnp, flags, rdir, cred, ct, direntflags, realpnp); VN_RELE(newvp); } return (error); } vn_vfsunlock(dvp); rw_enter(&dfnp->fn_rwlock, RW_READER); error = auto_search(dfnp, nm, &fnp, cred); if (error) { if (dfnip->fi_flags & MF_DIRECT) { /* * direct map. */ if (dfnp->fn_dirents) { /* * Mount previously triggered. * 'nm' not found */ error = ENOENT; } else { /* * I need to contact the daemon to trigger * the mount. 'dfnp' will be the mountpoint. */ operation = AUTOFS_MOUNT; VN_HOLD(fntovn(dfnp)); fnp = dfnp; error = 0; } } else if (dvp == dfnip->fi_rootvp) { /* * 'dfnp' is the root of the indirect AUTOFS. */ if (rw_tryupgrade(&dfnp->fn_rwlock) == 0) { /* * Could not acquire writer lock, release * reader, and wait until available. We * need to search for 'nm' again, since we * had to release the lock before reacquiring * it. */ rw_exit(&dfnp->fn_rwlock); rw_enter(&dfnp->fn_rwlock, RW_WRITER); error = auto_search(dfnp, nm, &fnp, cred); } ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock)); if (error) { /* * create node being looked-up and request * mount on it. */ error = auto_enter(dfnp, nm, &fnp, kcred); if (!error) operation = AUTOFS_LOOKUP; } } else if ((dfnp->fn_dirents == NULL) && ((dvp->v_flag & VROOT) == 0) && ((fntovn(dfnp->fn_parent))->v_flag & VROOT)) { /* * dfnp is the actual 'mountpoint' of indirect map, * it is the equivalent of a direct mount, * ie, /home/'user1' */ operation = AUTOFS_MOUNT; VN_HOLD(fntovn(dfnp)); fnp = dfnp; error = 0; searchnm = dfnp->fn_name; } } if (error == EAGAIN) { rw_exit(&dfnp->fn_rwlock); goto top; } if (error) { rw_exit(&dfnp->fn_rwlock); return (error); } /* * We now have the actual fnnode we're interested in. * The 'MF_LOOKUP' indicates another thread is currently * performing a daemon lookup of this node, therefore we * wait for its completion. * The 'MF_INPROG' indicates another thread is currently * performing a daemon mount of this node, we wait for it * to be done if we are performing a MOUNT. We don't * wait for it if we are performing a LOOKUP. * We can release the reader/writer lock as soon as we acquire * the mutex, since the state of the lock can only change by * first acquiring the mutex. */ mutex_enter(&fnp->fn_lock); rw_exit(&dfnp->fn_rwlock); if ((fnp->fn_flags & MF_LOOKUP) || ((operation == AUTOFS_MOUNT) && (fnp->fn_flags & MF_INPROG))) { mutex_exit(&fnp->fn_lock); error = auto_wait4mount(fnp); VN_RELE(fntovn(fnp)); if (error == AUTOFS_SHUTDOWN) error = ENOENT; if (error && error != EAGAIN) return (error); goto top; } if (operation == 0) { /* * got the fnnode, check for any errors * on the previous operation on that node. */ error = fnp->fn_error; if ((error == EINTR) || (error == EAGAIN)) { /* * previous operation on this node was * not completed, do a lookup now. */ operation = AUTOFS_LOOKUP; } else { /* * previous operation completed. Return * a pointer to the node only if there was * no error. */ mutex_exit(&fnp->fn_lock); if (!error) *vpp = fntovn(fnp); else VN_RELE(fntovn(fnp)); return (error); } } /* * Since I got to this point, it means I'm the one * responsible for triggering the mount/look-up of this node. */ switch (operation) { case AUTOFS_LOOKUP: AUTOFS_BLOCK_OTHERS(fnp, MF_LOOKUP); fnp->fn_error = 0; mutex_exit(&fnp->fn_lock); error = auto_lookup_aux(fnp, searchnm, cred); if (!error) { /* * Return this vnode */ *vpp = fntovn(fnp); } else { /* * release our reference to this vnode * and return error */ VN_RELE(fntovn(fnp)); } break; case AUTOFS_MOUNT: AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG); fnp->fn_error = 0; mutex_exit(&fnp->fn_lock); /* * auto_new_mount_thread fires up a new thread which * calls automountd finishing up the work */ auto_new_mount_thread(fnp, searchnm, cred); /* * At this point, we are simply another thread * waiting for the mount to complete */ error = auto_wait4mount(fnp); if (error == AUTOFS_SHUTDOWN) error = ENOENT; /* * now release our reference to this vnode */ VN_RELE(fntovn(fnp)); if (!error) goto top; break; default: auto_log(dfnp->fn_globals->fng_verbose, dfnp->fn_globals->fng_zoneid, CE_WARN, "auto_lookup: unknown operation %d", operation); } AUTOFS_DPRINT((5, "auto_lookup: name=%s *vpp=%p return=%d\n", nm, (void *)*vpp, error)); return (error); }
/* * 1. When we find a slot that belonged to a file which was deleted * and is in the middle of the directory * 2. There is not empty slot available. The new entry * will be at the end of the directory and fits in the same block. * 3. There is no empty slot available. The new * entry will not fit the left over directory * so we need to allocate a new block. If * we cannot allocate a proximity block we need * to allocate a new icb, and data block. */ int ud_dirprepareentry(struct ud_inode *dp, struct slot *slotp, uint8_t *buf, struct cred *cr) { struct fbuf *fbp; uint16_t old_dtype; int32_t error = 0; uint32_t entrysize, count, offset, tbno, old_size, off; struct file_id *fid; int32_t lbsize, lbmask, mask; ASSERT(RW_WRITE_HELD(&dp->i_rwlock)); ASSERT((slotp->status == NONE) || (slotp->status == FOUND)); ud_printf("ud_dirprepareentry\n"); lbsize = dp->i_udf->udf_lbsize; lbmask = dp->i_udf->udf_lbmask; mask = ~lbmask; fid = (struct file_id *)buf; entrysize = FID_LEN(fid); /* * If we didn't find a slot, then indicate that the * new slot belongs at the end of the directory. * If we found a slot, then the new entry can be * put at slotp->offset. */ if (slotp->status == NONE) { /* * We did not find a slot, the next * entry will be in the end of the directory * see if we can fit the new entry inside * the old block. If not allocate a new block. */ if (entrysize > slotp->size) { /* * extend the directory * size by one new block */ old_dtype = dp->i_desc_type; old_size = (uint32_t)dp->i_size; error = ud_bmap_write(dp, slotp->offset, blkoff(dp->i_udf, slotp->offset) + entrysize, 0, cr); if (error != 0) { return (error); } if (old_dtype != dp->i_desc_type) { /* * oops we changed the astrat * of the file, we have to * recaliculate tags * fortunately we donot have more * than one lbsize to handle here */ if ((error = ud_ip_off2bno(dp, 0, &tbno)) != 0) { return (error); } if ((error = fbread(ITOV(dp), 0, dp->i_udf->udf_lbsize, S_WRITE, &fbp)) != 0) { return (error); } off = 0; while (off < old_size) { struct file_id *tfid; tfid = (struct file_id *) (fbp->fb_addr + off); ud_make_tag(dp->i_udf, &tfid->fid_tag, UD_FILE_ID_DESC, tbno, FID_LEN(tfid)); off += FID_LEN(tfid); } if (error = ud_fbwrite(fbp, dp)) { return (error); } } } else { /* Extend the directory size */ if (dp->i_desc_type != ICB_FLAG_ONE_AD) { ASSERT(dp->i_ext); dp->i_ext[dp->i_ext_used - 1].ib_count += entrysize; } } dp->i_size += entrysize; dp->i_flag |= IUPD|ICHG|IATTCHG; ITIMES_NOLOCK(dp); } else if (slotp->status != FOUND) { cmn_err(CE_WARN, "status is not NONE/FOUND"); return (EINVAL); } if ((error = ud_ip_off2bno(dp, slotp->offset, &tbno)) != 0) { return (error); } ud_make_tag(dp->i_udf, &fid->fid_tag, UD_FILE_ID_DESC, tbno, FID_LEN(fid)); /* * fbread cannot cross a * MAXBSIZE boundary so handle it here */ offset = slotp->offset; if ((error = fbread(ITOV(dp), offset & mask, lbsize, S_WRITE, &fbp)) != 0) { return (error); } if ((offset & mask) != ((offset + entrysize) & mask)) { count = entrysize - ((offset + entrysize) & lbmask); } else { count = entrysize; } bcopy((caddr_t)buf, fbp->fb_addr + (offset & lbmask), count); if (error = ud_fbwrite(fbp, dp)) { return (error); } if (entrysize > count) { if ((error = fbread(ITOV(dp), (offset + entrysize) & mask, lbsize, S_WRITE, &fbp)) != 0) { return (error); } bcopy((caddr_t)(buf + count), fbp->fb_addr, entrysize - count); if (error = ud_fbwrite(fbp, dp)) { return (error); } } dp->i_flag |= IUPD|ICHG|IATTCHG; ITIMES_NOLOCK(dp); return (error); }
static int zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), dmu_tx_t *tx) { uint64_t b, newblk; dmu_buf_t *db_old, *db_new; int err; int bs = FZAP_BLOCK_SHIFT(zap); int hepb = 1<<(bs-4); /* hepb = half the number of entries in a block */ ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); ASSERT(tbl->zt_blk != 0); ASSERT(tbl->zt_numblks > 0); if (tbl->zt_nextblk != 0) { newblk = tbl->zt_nextblk; } else { newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2); tbl->zt_nextblk = newblk; ASSERT0(tbl->zt_blks_copied); dmu_prefetch(zap->zap_objset, zap->zap_object, 0, tbl->zt_blk << bs, tbl->zt_numblks << bs, ZIO_PRIORITY_SYNC_READ); } /* * Copy the ptrtbl from the old to new location. */ b = tbl->zt_blks_copied; err = dmu_buf_hold(zap->zap_objset, zap->zap_object, (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH); if (err) return (err); /* first half of entries in old[b] go to new[2*b+0] */ VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); dmu_buf_will_dirty(db_new, tx); transfer_func(db_old->db_data, db_new->db_data, hepb); dmu_buf_rele(db_new, FTAG); /* second half of entries in old[b] go to new[2*b+1] */ VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); dmu_buf_will_dirty(db_new, tx); transfer_func((uint64_t *)db_old->db_data + hepb, db_new->db_data, hepb); dmu_buf_rele(db_new, FTAG); dmu_buf_rele(db_old, FTAG); tbl->zt_blks_copied++; dprintf("copied block %llu of %llu\n", tbl->zt_blks_copied, tbl->zt_numblks); if (tbl->zt_blks_copied == tbl->zt_numblks) { (void) dmu_free_range(zap->zap_objset, zap->zap_object, tbl->zt_blk << bs, tbl->zt_numblks << bs, tx); tbl->zt_blk = newblk; tbl->zt_numblks *= 2; tbl->zt_shift++; tbl->zt_nextblk = 0; tbl->zt_blks_copied = 0; dprintf("finished; numblocks now %llu (%uk entries)\n", tbl->zt_numblks, 1<<(tbl->zt_shift-10)); } return (0); }
/* * Fix the FID_PARENT entry of the child directory so that it points * to the new parent directory instead of the old one. Routine * assumes that dp is a directory and that all the inodes are on * the same file system. */ int ud_dirfixdotdot(struct ud_inode *dp, struct ud_inode *opdp, struct ud_inode *npdp) { int32_t err = 0; struct fbuf *fbp; struct file_id *fid; uint32_t loc, dummy, tbno; ud_printf("ud_dirfixdotdot\n"); ASSERT(opdp->i_type == VDIR); ASSERT(npdp->i_type == VDIR); ASSERT(RW_WRITE_HELD(&npdp->i_rwlock)); err = fbread(ITOV(dp), (offset_t)0, dp->i_udf->udf_lbsize, S_WRITE, &fbp); if (err || dp->i_nlink == 0 || dp->i_size < sizeof (struct file_id)) { goto bad; } if ((err = ud_ip_off2bno(dp, 0, &tbno)) != 0) { goto bad; } fid = (struct file_id *)fbp->fb_addr; if ((ud_verify_tag_and_desc(&fid->fid_tag, UD_FILE_ID_DESC, tbno, 1, dp->i_udf->udf_lbsize) != 0) || ((fid->fid_flags & (FID_DIR | FID_PARENT)) != (FID_DIR | FID_PARENT))) { err = ENOTDIR; goto bad; } loc = ud_xlate_to_daddr(dp->i_udf, SWAP_16(fid->fid_icb.lad_ext_prn), SWAP_32(fid->fid_icb.lad_ext_loc), 1, &dummy); ASSERT(dummy == 1); if (loc == npdp->i_icb_lbano) { goto bad; } /* * Increment the link count in the new parent inode and force it out. */ if (npdp->i_nlink == MAXLINK) { err = EMLINK; goto bad; } npdp->i_nlink++; mutex_enter(&npdp->i_tlock); npdp->i_flag |= ICHG; mutex_exit(&npdp->i_tlock); ud_iupdat(npdp, 1); /* * Rewrite the child FID_PARENT entry and force it out. */ dnlc_remove(ITOV(dp), ".."); fid->fid_icb.lad_ext_loc = SWAP_32(npdp->i_icb_block); fid->fid_icb.lad_ext_prn = SWAP_16(npdp->i_icb_prn); ud_make_tag(npdp->i_udf, &fid->fid_tag, UD_FILE_ID_DESC, tbno, FID_LEN(fid)); dnlc_enter(ITOV(dp), "..", ITOV(npdp)); err = ud_fbwrite(fbp, dp); fbp = NULL; if (err != 0) { goto bad; } /* * Decrement the link count of the old parent inode and force * it out. If opdp is NULL, then this is a new directory link; * it has no parent, so we need not do anything. */ if (opdp != NULL) { rw_enter(&opdp->i_contents, RW_WRITER); if (opdp->i_nlink != 0) { opdp->i_nlink--; mutex_enter(&opdp->i_tlock); opdp->i_flag |= ICHG; mutex_exit(&opdp->i_tlock); ud_iupdat(opdp, 1); } rw_exit(&opdp->i_contents); } return (0); bad: if (fbp) { fbrelse(fbp, S_OTHER); } return (err); }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os); if (err) { zsb->z_os = NULL; } else { znode_t *zp; uint64_t sa_obj = 0; err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); } bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't reopen zfs_sb_t or, setup the * sa framework, force unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
int ud_direnter( struct ud_inode *tdp, char *namep, enum de_op op, struct ud_inode *sdp, struct ud_inode *sip, struct vattr *vap, struct ud_inode **ipp, struct cred *cr, caller_context_t *ctp) { struct udf_vfs *udf_vfsp; struct ud_inode *tip; struct slot slot; int32_t namlen, err; char *s; uint8_t *buf = NULL; ud_printf("ud_direnter\n"); udf_vfsp = tdp->i_udf; /* don't allow '/' characters in pathname component */ for (s = namep, namlen = 0; *s; s++, namlen++) { if (*s == '/') { return (EACCES); } } if (namlen == 0) { cmn_err(CE_WARN, "name length == 0 in ud_direnter"); return (EINVAL); } ASSERT(RW_WRITE_HELD(&tdp->i_rwlock)); /* * If name is "." or ".." then if this is a create look it up * and return EEXIST. Rename or link TO "." or ".." is forbidden. */ if (namep[0] == '.' && (namlen == 1 || (namlen == 2 && namep[1] == '.'))) { if (op == DE_RENAME) { return (EINVAL); /* *SIGH* should be ENOTEMPTY */ } if (ipp) { /* * ud_dirlook will acquire the i_rwlock */ rw_exit(&tdp->i_rwlock); if (err = ud_dirlook(tdp, namep, ipp, cr, 0)) { rw_enter(&tdp->i_rwlock, RW_WRITER); return (err); } rw_enter(&tdp->i_rwlock, RW_WRITER); } return (EEXIST); } tip = NULL; slot.status = NONE; slot.offset = 0; slot.size = 0; slot.fbp = NULL; slot.ep = NULL; slot.endoff = 0; /* * For link and rename lock the source entry and check the link count * to see if it has been removed while it was unlocked. If not, we * increment the link count and force the inode to disk to make sure * that it is there before any directory entry that points to it. */ if (op == DE_LINK || op == DE_RENAME) { rw_enter(&sip->i_contents, RW_WRITER); if (sip->i_nlink == 0) { rw_exit(&sip->i_contents); return (ENOENT); } if (sip->i_nlink == MAXLINK) { rw_exit(&sip->i_contents); return (EMLINK); } sip->i_nlink++; mutex_enter(&sip->i_tlock); sip->i_flag |= ICHG; mutex_exit(&sip->i_tlock); ud_iupdat(sip, 1); rw_exit(&sip->i_contents); } /* * If target directory has not been removed, then we can consider * allowing file to be created. */ if (tdp->i_nlink == 0) { err = ENOENT; goto out2; } /* * Check accessibility of directory. */ if (tdp->i_type != VDIR) { err = ENOTDIR; goto out2; } /* * Execute access is required to search the directory. */ if (err = ud_iaccess(tdp, IEXEC, cr)) { goto out2; } /* * If this is a rename of a directory and the parent is * different (".." must be changed), then the source * directory must not be in the directory hierarchy * above the target, as this would orphan everything * below the source directory. Also the user must have * write permission in the source so as to be able to * change "..". */ if (op == DE_RENAME) { if (sip == tdp) { err = EINVAL; goto out2; } rw_enter(&sip->i_contents, RW_READER); if ((sip->i_type == VDIR) && (sdp != tdp)) { uint32_t blkno; if ((err = ud_iaccess(sip, IWRITE, cr))) { rw_exit(&sip->i_contents); goto out2; } blkno = sip->i_icb_lbano; rw_exit(&sip->i_contents); if ((err = ud_dircheckpath(blkno, tdp, cr))) { goto out2; } } else { rw_exit(&sip->i_contents); } } /* * Search for the entry. Return VN_HELD tip if found. */ buf = kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); rw_enter(&tdp->i_contents, RW_WRITER); if (err = ud_dircheckforname(tdp, namep, namlen, &slot, &tip, buf, cr)) { goto out; } if (tip) { switch (op) { case DE_CREATE : case DE_MKDIR : if (ipp) { *ipp = tip; err = EEXIST; } else { VN_RELE(ITOV(tip)); } break; case DE_RENAME : err = ud_dirrename(sdp, sip, tdp, tip, namep, buf, &slot, cr); /* * We used to VN_RELE() here, but this * was moved down so that we could send * a vnevent after the locks were dropped. */ break; case DE_LINK : /* * Can't link to an existing file. */ VN_RELE(ITOV(tip)); err = EEXIST; break; } } else { /* * The entry does not exist. Check write permission in * directory to see if entry can be created. */ if (err = ud_iaccess(tdp, IWRITE, cr)) { goto out; } if ((op == DE_CREATE) || (op == DE_MKDIR)) { /* * Make new inode and directory entry as required. */ if (err = ud_dirmakeinode(tdp, &sip, vap, op, cr)) goto out; } if (err = ud_diraddentry(tdp, namep, op, namlen, &slot, sip, sdp, cr)) { if ((op == DE_CREATE) || (op == DE_MKDIR)) { /* * Unmake the inode we just made. */ rw_enter(&sip->i_contents, RW_WRITER); if (sip->i_type == VDIR) { tdp->i_nlink--; } sip->i_nlink = 0; mutex_enter(&sip->i_tlock); sip->i_flag |= ICHG; mutex_exit(&sip->i_tlock); rw_exit(&sip->i_contents); VN_RELE(ITOV(sip)); sip = NULL; } } else if (ipp) { *ipp = sip; } else if ((op == DE_CREATE) || (op == DE_MKDIR)) { VN_RELE(ITOV(sip)); } } out: if (buf != NULL) { kmem_free(buf, udf_vfsp->udf_lbsize); } if (slot.fbp) { fbrelse(slot.fbp, S_OTHER); } rw_exit(&tdp->i_contents); if (op == DE_RENAME) { /* * If it's all good, send events after locks are dropped * but before vnodes are released. */ if (err == 0) { if (tip) { vnevent_rename_dest(ITOV(tip), ITOV(tdp), namep, ctp); } if (sdp != tdp) { vnevent_rename_dest_dir(ITOV(tdp), ctp); } } /* * The following VN_RELE() was moved from the * DE_RENAME case above */ if (tip) { VN_RELE(ITOV(tip)); } } out2: if (err && ((op == DE_LINK) || (op == DE_RENAME))) { /* * Undo bumped link count. */ rw_enter(&sip->i_contents, RW_WRITER); sip->i_nlink--; rw_exit(&sip->i_contents); mutex_enter(&sip->i_tlock); sip->i_flag |= ICHG; mutex_exit(&sip->i_tlock); } return (err); }
/* * wrxmem does the real work of write requests for xmemfs. */ static int wrxmem(struct xmount *xm, struct xmemnode *xp, struct uio *uio, struct cred *cr, struct caller_context *ct) { uint_t blockoffset; /* offset in the block */ uint_t blkwr; /* offset in blocks into xmem file */ uint_t blkcnt; caddr_t base; ssize_t bytes; /* bytes to uiomove */ struct vnode *vp; int error = 0; size_t bsize = xm->xm_bsize; rlim64_t limit = uio->uio_llimit; long oresid = uio->uio_resid; timestruc_t now; offset_t offset; /* * xp->xn_size is incremented before the uiomove * is done on a write. If the move fails (bad user * address) reset xp->xn_size. * The better way would be to increment xp->xn_size * only if the uiomove succeeds. */ long xn_size_changed = 0; offset_t old_xn_size; vp = XNTOV(xp); ASSERT(vp->v_type == VREG); XMEMPRINTF(1, ("wrxmem: vp %p resid %lx off %llx\n", (void *)vp, uio->uio_resid, uio->uio_loffset)); ASSERT(RW_WRITE_HELD(&xp->xn_contents)); ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); if (MANDLOCK(vp, xp->xn_mode)) { rw_exit(&xp->xn_contents); /* * xmem_getattr ends up being called by chklock */ error = chklock(vp, FWRITE, uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); rw_enter(&xp->xn_contents, RW_WRITER); if (error != 0) { XMEMPRINTF(8, ("wrxmem: vp %p error %x\n", (void *)vp, error)); return (error); } } if ((offset = uio->uio_loffset) < 0) return (EINVAL); if (offset >= limit) { proc_t *p = ttoproc(curthread); mutex_enter(&p->p_lock); (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, p, RCA_UNSAFE_SIGINFO); mutex_exit(&p->p_lock); return (EFBIG); } if (uio->uio_resid == 0) { XMEMPRINTF(8, ("wrxmem: vp %p resid %lx\n", (void *)vp, uio->uio_resid)); return (0); } /* * Get the highest blocknumber and allocate page array if needed. * Note that if xm_bsize != PAGESIZE, each ppa[] is pointer to * a page array rather than just a page. */ blkcnt = howmany((offset + uio->uio_resid), bsize); blkwr = offset >> xm->xm_bshift; /* write begins here */ XMEMPRINTF(1, ("wrxmem: vp %p blkcnt %x blkwr %x xn_ppasz %lx\n", (void *)vp, blkcnt, blkwr, xp->xn_ppasz)); /* file size increase */ if (xp->xn_ppasz < blkcnt) { page_t ***ppa; int ppasz; uint_t blksinfile = howmany(xp->xn_size, bsize); /* * check if sufficient blocks available for the given offset. */ if (blkcnt - blksinfile > xm->xm_max - xm->xm_mem) return (ENOSPC); /* * to prevent reallocating every time the file grows by a * single block, double the size of the array. */ if (blkcnt < xp->xn_ppasz * 2) ppasz = xp->xn_ppasz * 2; else ppasz = blkcnt; ppa = kmem_zalloc(ppasz * sizeof (page_t **), KM_SLEEP); ASSERT(ppa); if (xp->xn_ppasz) { bcopy(xp->xn_ppa, ppa, blksinfile * sizeof (*ppa)); kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); } xp->xn_ppa = ppa; xp->xn_ppasz = ppasz; /* * fill in the 'hole' if write offset beyond file size. This * helps in creating large files quickly; an application can * lseek to a large offset and perform a single write * operation to create the large file. */ if (blksinfile < blkwr) { old_xn_size = xp->xn_size; xp->xn_size = (offset_t)blkwr * bsize; XMEMPRINTF(4, ("wrxmem: fill vp %p blks %x to %x\n", (void *)vp, blksinfile, blkcnt - 1)); error = xmem_fillpages(xp, vp, (offset_t)blksinfile * bsize, (offset_t)(blkcnt - blksinfile) * bsize, 1); if (error) { /* truncate file back to original size */ (void) xmemnode_trunc(xm, xp, old_xn_size); return (error); } /* * if error on blkwr, this allows truncation of the * filled hole. */ xp->xn_size = old_xn_size; } } do { offset_t pagestart, pageend; page_t **ppp; blockoffset = (uint_t)offset & (bsize - 1); /* * A maximum of xm->xm_bsize bytes of data is transferred * each pass through this loop */ bytes = MIN(bsize - blockoffset, uio->uio_resid); ASSERT(bytes); if (offset + bytes >= limit) { if (offset >= limit) { error = EFBIG; goto out; } bytes = limit - offset; } if (!xp->xn_ppa[blkwr]) { /* zero fill new pages - simplify partial updates */ error = xmem_fillpages(xp, vp, offset, bytes, 1); if (error) return (error); } /* grow the file to the new length */ if (offset + bytes > xp->xn_size) { xn_size_changed = 1; old_xn_size = xp->xn_size; xp->xn_size = offset + bytes; } #ifdef LOCKNEST xmem_getpage(); #endif /* xn_ppa[] is a page_t * if ppb == 1 */ if (xm->xm_ppb == 1) ppp = (page_t **)&xp->xn_ppa[blkwr]; else ppp = &xp->xn_ppa[blkwr][btop(blockoffset)]; pagestart = offset & ~(offset_t)(PAGESIZE - 1); /* * subtract 1 in case (offset + bytes) is mod PAGESIZE * so that pageend is the actual index of last page. */ pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); base = segxmem_getmap(xm->xm_map, vp, pagestart, pageend - pagestart + PAGESIZE, ppp, S_WRITE); rw_exit(&xp->xn_contents); error = uiomove(base + (offset - pagestart), bytes, UIO_WRITE, uio); segxmem_release(xm->xm_map, base, pageend - pagestart + PAGESIZE); /* * Re-acquire contents lock. */ rw_enter(&xp->xn_contents, RW_WRITER); /* * If the uiomove failed, fix up xn_size. */ if (error) { if (xn_size_changed) { /* * The uiomove failed, and we * allocated blocks,so get rid * of them. */ (void) xmemnode_trunc(xm, xp, old_xn_size); } } else { if ((xp->xn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && (xp->xn_mode & (S_ISUID | S_ISGID)) && secpolicy_vnode_setid_retain(cr, (xp->xn_mode & S_ISUID) != 0 && xp->xn_uid == 0) != 0) { /* * Clear Set-UID & Set-GID bits on * successful write if not privileged * and at least one of the execute bits * is set. If we always clear Set-GID, * mandatory file and record locking is * unuseable. */ xp->xn_mode &= ~(S_ISUID | S_ISGID); } gethrestime(&now); xp->xn_mtime = now; xp->xn_ctime = now; } offset = uio->uio_loffset; /* uiomove sets uio_loffset */ blkwr++; } while (error == 0 && uio->uio_resid > 0 && bytes != 0); out: /* * If we've already done a partial-write, terminate * the write but return no error. */ if (oresid != uio->uio_resid) error = 0; return (error); }
static void dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) { dmu_buf_impl_t *db; int txgoff = tx->tx_txg & TXG_MASK; int nblkptr = dn->dn_phys->dn_nblkptr; int old_toplvl = dn->dn_phys->dn_nlevels - 1; int new_level = dn->dn_next_nlevels[txgoff]; int i; rw_enter(&dn->dn_struct_rwlock, RW_WRITER); /* this dnode can't be paged out because it's dirty */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); ASSERT(db != NULL); dn->dn_phys->dn_nlevels = new_level; dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, dn->dn_object, dn->dn_phys->dn_nlevels); /* transfer dnode's block pointers to new indirect block */ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); ASSERT(db->db.db_data); ASSERT(arc_released(db->db_buf)); ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, sizeof (blkptr_t) * nblkptr); arc_buf_freeze(db->db_buf); /* set dbuf's parent pointers to new indirect buf */ for (i = 0; i < nblkptr; i++) { dmu_buf_impl_t *child = dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); if (child == NULL) continue; #ifdef DEBUG DB_DNODE_ENTER(child); ASSERT3P(DB_DNODE(child), ==, dn); DB_DNODE_EXIT(child); #endif /* DEBUG */ if (child->db_parent && child->db_parent != dn->dn_dbuf) { ASSERT(child->db_parent->db_level == db->db_level); ASSERT(child->db_blkptr != &dn->dn_phys->dn_blkptr[child->db_blkid]); mutex_exit(&child->db_mtx); continue; } ASSERT(child->db_parent == NULL || child->db_parent == dn->dn_dbuf); child->db_parent = db; dbuf_add_ref(db, child); if (db->db.db_data) child->db_blkptr = (blkptr_t *)db->db.db_data + i; else child->db_blkptr = NULL; dprintf_dbuf_bp(child, child->db_blkptr, "changed db_blkptr to new indirect %s", ""); mutex_exit(&child->db_mtx); } bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); dbuf_rele(db, FTAG); rw_exit(&dn->dn_struct_rwlock); }