Exemplo n.º 1
0
/*
 * Unhook an attribute directory from a parent file/dir
 * Only do so, if we are the only user of the vnode.
 */
void
ufs_unhook_shadow(struct inode *ip, struct inode *sip)
{
	struct vnode		*datavp = ITOV(ip);
	struct vnode		*dirvp = ITOV(sip);
	int			hno;
	kmutex_t		*ihm;

	ASSERT(RW_WRITE_HELD(&sip->i_contents));
	ASSERT(RW_WRITE_HELD(&ip->i_contents));

	if (vn_is_readonly(ITOV(ip)))
		return;

	if (ip->i_ufsvfs == NULL || sip->i_ufsvfs == NULL)
		return;

	hno = INOHASH(ip->i_number);
	ihm = &ih_lock[hno];
	mutex_enter(ihm);

	mutex_enter(&datavp->v_lock);
	mutex_enter(&dirvp->v_lock);

	if (dirvp->v_count != 1 && datavp->v_count != 1) {
		mutex_exit(&dirvp->v_lock);
		mutex_exit(&datavp->v_lock);
		mutex_exit(ihm);
		return;
	}

	/*
	 * Delete shadow from ip
	 */

	sip->i_nlink -= 2;
	ufs_setreclaim(sip);
	TRANS_INODE(sip->i_ufsvfs, sip);
	sip->i_flag |= ICHG;
	sip->i_seq++;
	ITIMES_NOLOCK(sip);

	/*
	 * Update src file
	 */
	ip->i_oeftflag = 0;
	TRANS_INODE(ip->i_ufsvfs, ip);
	ip->i_flag |= ICHG;
	ip->i_seq++;
	ufs_iupdat(ip, 1);
	mutex_exit(&dirvp->v_lock);
	mutex_exit(&datavp->v_lock);
	mutex_exit(ihm);
}
Exemplo n.º 2
0
int
ufs_mark_compressed(struct vnode *vp)
{
	struct inode *ip = VTOI(vp);
	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;

	if (vp->v_type != VREG)
		return (EINVAL);

	rw_enter(&ip->i_contents, RW_WRITER);
	ip->i_cflags |= ICOMPRESS;
	TRANS_INODE(ufsvfsp, ip);
	ip->i_flag |= (ICHG|ISEQ);
	ip->i_seq++;
	if (!TRANS_ISTRANS(ufsvfsp))
		ufs_iupdat(ip, I_ASYNC);
	rw_exit(&ip->i_contents);

	return (0);
}
Exemplo n.º 3
0
int				/* ERRNO if error, 0 if successful. */
sam_setattr_ino(
	sam_node_t *ip,		/* pointer to inode. */
	vattr_t *vap,		/* vattr pointer. */
	int flags,		/* flags. */
	cred_t *credp)		/* credentials pointer. */
{
	uint_t mask;
	int error = 0;
	vnode_t *vp;
	sam_mode_t oldmode, mode;
	timespec_t  system_time;
	vattr_t oldva;

	oldva.va_mode = ip->di.mode;
	oldva.va_uid = ip->di.uid;
	oldva.va_gid = ip->di.gid;

	vp = SAM_ITOV(ip);
	if (vap->va_mask & AT_NOSET) {
		return (EINVAL);
	}
	mode = vap->va_mode & ~S_IFMT;
	SAM_HRESTIME(&system_time);

	/*
	 * Enforce the "read only" portion of WORM files.
	 */
	if (ip->di.status.b.worm_rdonly && !S_ISDIR(ip->di.mode)) {
		error = sam_chk_worm(mode, vap->va_mask, ip);
		if (error) {
			return (error);
		}
	}

	/*
	 * Generic setattr security policy check.
	 */
	if (error = secpolicy_vnode_setattr(credp, vp, vap,
	    &oldva, flags, sam_access_ino_ul, ip)) {
		return (error);
	}

	mask = vap->va_mask;

	if (mask & AT_SIZE) {		/* -----Change size */
		if (error == 0) {
			/* Can only truncate a regular file */
			if (S_ISREQ(ip->di.mode)) {
				error = EINVAL;
				goto out;
			} else if (SAM_PRIVILEGE_INO(ip->di.version,
			    ip->di.id.ino)) {
				error = EPERM;	/* Can't trunc priv'ed inodes */
				goto out;
			}
			if (S_ISSEGI(&ip->di) && (vap->va_size != 0)) {
				/*
				 * If file is segment access and not truncating
				 * to zero--fix.
				 */
				error = EINVAL;
				goto out;
			}
			/*
			 * Might need to do TRANS_ITRUNC here for LQFS....
			 */
			if ((error = sam_clear_ino(ip, (offset_t)vap->va_size,
			    STALE_ARCHIVE, credp))) {
				goto out;
			}
		}
	}

	if (mask & AT_MODE) {				/* -----Change mode */
		/* Cannot change .inodes file */
		if (ip->di.id.ino == SAM_INO_INO) {
			error = EPERM;
			goto out;
		}
		oldmode = ip->di.mode;
		ip->di.mode &= S_IFMT;
		ip->di.mode |= vap->va_mode & ~S_IFMT;
		if (ip->di.status.b.worm_rdonly) {
			if (!S_ISDIR(ip->di.mode)) {
				ip->di.mode &= ~WMASK;
			}
			if (oldmode & S_ISUID) {
				ip->di.mode |= S_ISUID;
			}
		}

		/*
		 * In 4.6 there are two modes of WORM trigger operation.
		 * One is compatible with the 53xx SUN NAS series.  This
		 * mode uses the SUID bit by itself.  The second mode is
		 * called compatibility mode.  This mode uses the transition
		 * from a writeable mode as the trigger. Note, copying a
		 * read-only file to a WORM capable volume does *NOT*
		 * initiate the WORM trigger in this mode.
		 */
		if (samgt.license.license.lic_u.b.WORM_fs &&
		    (ip->di.version >= SAM_INODE_VERS_2) &&
		    (((vap->va_mode == S_ISUID) &&
		    (ip->mp->mt.fi_config & MT_ALLWORM)) ||
		    ((ip->mp->mt.fi_config & MT_ALLEMUL) &&
		    (((oldmode & RWXALLMASK) == RWXALLMASK) ||
		    ((vap->va_mode != S_ISUID) &&
		    (oldmode & WMASK) && !(ip->di.mode & WMASK)))))) {
			error = sam_worm_trigger(ip, oldmode, system_time);
			if (error) {
				ip->di.mode = oldmode;
				goto out;
			} else if ((ip->mp->mt.fi_config & MT_ALLEMUL) &&
			    ((oldmode & RWXALLMASK) == RWXALLMASK)) {
				ip->di.mode = oldmode;
			} else if ((vap->va_mode == S_ISUID) &&
			    (!S_ISDIR(ip->di.mode))) {
				if (ip->mp->mt.fi_config & MT_ALLEMUL) {
					ip->di.mode = oldmode &
					    (S_IFMT | RMASK);
				} else {
					ip->di.mode = S_ISUID |
					    (oldmode & (S_IFMT | RMASK));
				}
			}
		}
		TRANS_INODE(ip->mp, ip);
		if (S_ISATTRDIR(oldmode)) {
			ip->di.mode |= S_IFATTRDIR;
		}
		sam_mark_ino(ip, SAM_CHANGED);
	}

	if (mask & (AT_UID | AT_GID)) {		/* -----Change uid/gid */
		int ouid, ogid;

		if (vap->va_mask & AT_MODE) {
			ip->di.mode = (ip->di.mode & S_IFMT) |
			    (vap->va_mode & ~S_IFMT);
		}
		/*
		 * To change file ownership, a process must have
		 * privilege if:
		 *
		 * If it is not the owner of the file, or
		 * if doing restricted chown semantics and
		 * either changing the ownership to someone else or
		 * changing the group to a group that we are not
		 * currently in.
		 */
		if (crgetuid(credp) != ip->di.uid ||
		    (rstchown &&
		    (((mask & AT_UID) && vap->va_uid != ip->di.uid) ||
		    ((mask & AT_GID) && !groupmember(vap->va_gid, credp))))) {
			error = secpolicy_vnode_owner(credp, vap->va_uid);
			if (error) {
				goto out;
			}
		}

		ouid = ip->di.uid;
		ogid = ip->di.gid;
		if (error = sam_quota_chown(ip->mp, ip,
		    (mask&AT_UID) ? vap->va_uid : ouid,
		    (mask&AT_GID) ? vap->va_gid : ogid, credp)) {
			goto out;
		}
		if (mask & AT_UID)  ip->di.uid = vap->va_uid;
		if (mask & AT_GID)  ip->di.gid = vap->va_gid;
		ip->di.status.b.archdone = 0;
		TRANS_INODE(ip->mp, ip);
		sam_mark_ino(ip, SAM_CHANGED);
		/*
		 * Notify arfind and event daemon of setattr.
		 */
		sam_send_to_arfind(ip, AE_change, 0);
		if (ip->mp->ms.m_fsev_buf) {
			sam_send_event(ip->mp, &ip->di, ev_change, 0, 0,
			    ip->di.change_time.tv_sec);
		}
	}

	if (mask & (AT_ATIME | AT_MTIME)) {	/* -----Modify times */
		/*
		 * Synchronously flush pages so dates do not get changed after
		 * utime.  If staging, finish stage and then flush pages.
		 */
		if (ip->flags.b.staging) {
			/*
			 * Might need to do TRANS_ITRUNC or similar here
			 * for LQFS
			 */
			if ((error = sam_clear_file(ip, ip->di.rm.size,
			    MAKE_ONLINE, credp))) {
				goto out;
			}
		}
		sam_flush_pages(ip, 0);
		if (mask & AT_ATIME) {
			/*
			 * The access time field is used by WORM operations to
			 * store the retention timestamp.  This is intercepted
			 * here and stored in the inode's retention period
			 * time fields if either the field hasn't been set or
			 * the provided value exceeds what is currently there
			 * (i.e. we're extending the period).
			 */
			error = sam_check_worm_capable(ip, TRUE);
			if (!error) {
				boolean_t   lite_mode =
				    ((ip->mp->mt.fi_config &
				    MT_LITE_WORM) != 0);
				boolean_t   is_priv =
				    (secpolicy_fs_config(credp,
				    ip->mp->mi.m_vfsp) == 0);

				if (S_ISREG(ip->di.mode) && !WORM(ip)) {
					/*
					 * Regular file in WORM capable
					 * directory.  Set access time per the
					 * request.
					 */
					ip->di.access_time.tv_sec =
					    vap->va_atime.tv_sec;
					ip->di.access_time.tv_nsec =
					    vap->va_atime.tv_nsec;
					ip->flags.b.accessed = 1;
				} else if (WORM(ip) &&
				    (ip->di.version >= SAM_INODE_VERS_2)) {
					boolean_t	extend_period;
					/*
					 * Extend the retention period if so
					 * requested. If lite mode and a
					 * privileged user or a directory allow
					 * the retention period to be shortened.
					 */

					if (vap->va_atime.tv_sec >
					    ip->di2.rperiod_start_time +
					    ip->di2.rperiod_duration * 60) {
						extend_period = 1;
					} else {
						extend_period = 0;
					}

					if (S_ISREG(ip->di.mode) &&
					    extend_period) {
						error = sam_set_rperiod(ip,
						    vap, is_priv &&
						    lite_mode);
					} else if (S_ISDIR(ip->di.mode) ||
					    (S_ISREG(ip->di.mode) &&
					    is_priv && lite_mode)) {
						/*
						 * If the requested time would
						 * result in a non- negative
						 * retention period, set the
						 * period to the difference of
						 * the request and current time.
						 * A negative retention period
						 * is not allowed.
						 */
						if (vap->va_atime.tv_sec >
						    system_time.tv_sec) {


			if (vap->va_atime.tv_sec == INT_MAX) {
				ip->di2.rperiod_duration = 0;
			} else {
				ip->di2.rperiod_duration = 1 +
				    (vap->va_atime.tv_sec -
				    system_time.tv_sec)/60;
			}


						} else {
							error = EINVAL;
						}
					}
					if (error) {
						goto out;
					}
				} else {
					/*
					 * Shouldn't get here, invalid request.
					 */
					error = EINVAL;
					goto out;
				}
				TRANS_INODE(ip->mp, ip);
				sam_mark_ino(ip, SAM_CHANGED);
			} else {
				error = 0;
				ip->di.access_time.tv_sec =
				    vap->va_atime.tv_sec;
				ip->di.access_time.tv_nsec =
				    vap->va_atime.tv_nsec;
				ip->flags.b.accessed = 1;
				TRANS_INODE(ip->mp, ip);
			}
		}
		if (mask & AT_MTIME) {
			if (!ip->di.status.b.worm_rdonly) {
				ip->di.modify_time.tv_sec =
				    vap->va_mtime.tv_sec;
				ip->di.modify_time.tv_nsec =
				    vap->va_mtime.tv_nsec;
				ip->di.change_time.tv_sec = system_time.tv_sec;
				ip->di.change_time.tv_nsec =
				    system_time.tv_nsec;
				ip->flags.b.updated = 1;
				/* Modify time has been set */
				ip->flags.b.dirty = 1;
			}
			TRANS_INODE(ip->mp, ip);
		}
	}

	/*
	 * Check for and apply ACL info, if present.
	 */
	if (ip->di.status.b.acl && !ip->di.status.b.worm_rdonly) {
		if (SAM_IS_SHARED_FS(ip->mp) && SAM_IS_SHARED_SERVER(ip->mp)) {
			RW_UNLOCK_OS(&ip->inode_rwl, RW_WRITER);
			sam_callout_acl(ip, ip->mp->ms.m_client_ord);
			RW_LOCK_OS(&ip->inode_rwl, RW_WRITER);
		}
		if (error = sam_acl_setattr(ip, vap)) {
			goto out;
		}
	}
	if (ip->mp->mt.fi_config & MT_SHARED_WRITER) {
		if ((error == 0) &&
		    (ip->flags.bits & (SAM_ACCESSED|SAM_UPDATED|SAM_CHANGED))) {
			(void) sam_update_inode(ip, SAM_SYNC_ONE, FALSE);
		}
	}
out:

	return (error);
}
Exemplo n.º 4
0
static int
sam_worm_trigger(sam_node_t *ip, sam_mode_t oldmode, timespec_t system_time)
{
	struct sam_sbinfo *sblk = &ip->mp->mi.m_sbp->info.sb;
	boolean_t compat_mode = 0;
	vnode_t *vp;
	sam_node_t *pip;
	sam_time_t parent_def_retention = -1;
	sam_mount_t	*mp = ip->mp;

	vp = SAM_ITOV(ip);

	/*
	 * In 4.6 there are two modes of WORM trigger operation.
	 * One is compatible with the 53xx SUN NAS series. This mode
	 * uses the SUID bit by itself.  If the setuid is to be set
	 * and no execute bits were set on a file, then the WORM
	 * bit should be set.  In addition, the new access mode will
	 * be the old access mode OR'ed with any read bits and the
	 * setuid bit.   The second mode is called compatibility mode.
	 * This mode uses the transition from a writeable mode as the
	 * trigger.
	 */
	if (sam_check_worm_capable(ip, TRUE) == 0) {

		ASSERT(ip->di.version >= SAM_INODE_VERS_2);

		compat_mode = ((ip->mp->mt.fi_config & MT_ALLEMUL) != 0);

		/*
		 * If any execute bit is set and compat mode is not set,
		 * (eg 53xx SUN NAS mode) reset the mode bits and
		 * return an error.
		 */
		if (!compat_mode &&
		    (oldmode & XMASK) && (vp->v_type != VDIR)) {
			ip->di.mode = oldmode;
			return (EACCES);
		}

		/*
		 * We don't want to set the SUID bit on directories
		 * (53XX NAS mode).  We do want to set the retention
		 * period on directories.  This period will be used
		 * to set the default retention period on a file in
		 * the directory.
		 */
		if (vp->v_type != VDIR) {
			/*
			 * The flag in the superblock needs to be modified
			 * indicating WORM is active in this volume.  This is
			 * done to protect the volume from being destroyed
			 * (eg sammkfs).  Do this only once unless we're
			 * upgrading from a lite mode.  If a WORM option
			 * was set with a previous trigger, do not update
			 * the superblock again.  If we're upgrading from a
			 * lite mode the volume's superblock needs to be
			 * updated to reflect the stricter mode.  The mount
			 * code verifies and doesn't allow multiple WORM
			 * options to be set.
			 */
			if (SBLK_UPDATE(sblk, ip) &&
			    (sblk->opt_mask_ver == SBLK_OPT_VER1)) {
				boolean_t	update_sblk = 0;

				if (WORM_MT_OPT(ip->mp) && !SBLK_WORM(sblk)) {
					sblk->opt_mask |= SBLK_OPTV1_WORM;
					sblk->opt_mask &= ~SBLK_OPTV1_WORM_LITE;
					update_sblk = 1;
				}

				if (EMUL_MT_OPT(mp) && !SBLK_WORM_EMUL(sblk)) {
					sblk->opt_mask |= SBLK_OPTV1_WORM_EMUL;
					sblk->opt_mask &= ~SBLK_OPTV1_EMUL_LITE;
					update_sblk = 1;
				}

				if (WORM_LITE_MT_OPT(mp) &&
				    !(SBLK_WORM_LITE(sblk) &&
				    WORM_LITE_MT_OPT(mp))) {
					sblk->opt_mask |= SBLK_OPTV1_WORM_LITE;
					update_sblk = 1;
				}

				if (EMUL_LITE_MT_OPT(mp) &&
				    !(SBLK_EMUL_LITE(sblk) &&
				    EMUL_LITE_MT_OPT(mp))) {
					sblk->opt_mask |= SBLK_OPTV1_EMUL_LITE;
					update_sblk = 1;
				}

				if (update_sblk) {
					sblk->opt_mask |=
					    SBLK_OPTV1_CONV_WORMV2;
					(void) sam_update_all_sblks(mp);
				}
			}
		} else {
			/*
			 * Don't set the SUID bit on directories.
			 */
			ip->di.mode &= ~S_ISUID;
			ip->di.mode |= oldmode;
		}

		/*
		 * If this is the first time setting the WORM
		 * bit and the retention time period has not been
		 * specified the default retention period will
		 * be used.  The retention period is stored as a
		 * number of minutes in the rperiod_duration field.
		 * Else, pickup the access time as it contains
		 * the desired retention period.
		 */
		if (!ip->di.status.b.worm_rdonly) {
			if (ip->di.access_time.tv_sec <= system_time.tv_sec) {
				/*
				 * Check to see if the parent directory has a
				 * default retention.  If so, pass it to the
				 * child.
				 */
				RW_UNLOCK_OS(&ip->inode_rwl, RW_WRITER);
				if (sam_get_ino(ip->mp->mi.m_vfsp, IG_EXISTS,
				    &ip->di.parent_id, &pip) == 0) {
					RW_LOCK_OS(&pip->inode_rwl, RW_READER);
					if (pip->di.status.b.worm_rdonly) {
						ip->di2.rperiod_duration =
						    parent_def_retention =
						    pip->di2.rperiod_duration;
					}
					RW_UNLOCK_OS(&pip->inode_rwl,
					    RW_READER);
				}
				VN_RELE(SAM_ITOV(pip));
				RW_LOCK_OS(&ip->inode_rwl, RW_WRITER);

				/*
				 * If we have't assigned a retention
				 * set the period to the system default.
				 */
				if (parent_def_retention == -1) {
					ip->di2.rperiod_duration =
					    ip->mp->mt.fi_def_retention;
				}
			} else if (ip->di.access_time.tv_sec == INT_MAX) {
				/*
				 * The access time was set to its largest
				 * value. The user wants permanent retention.
				 */
				ip->di2.rperiod_duration = 0;
			} else {
				/*
				 * The access time was advanced. The user
				 * has set the retention period to some
				 * point in the future.
				 */
				ip->di2.rperiod_duration = 1 +
				    (ip->di.access_time.tv_sec -
				    system_time.tv_sec)/60;
			}
			ip->di2.rperiod_start_time =
			    system_time.tv_sec;
			ip->di2.p2flags |= P2FLAGS_WORM_V2;
			TRANS_INODE(ip->mp, ip);
			sam_mark_ino(ip, SAM_CHANGED);
		}
		ip->di.status.b.worm_rdonly = 1;
	}
	return (0);
}
Exemplo n.º 5
0
/* ARGSUSED7 */
int				/* ERRNO if error, 0 if successful. */
sam_create_ino(
	sam_node_t *pip,	/* pointer to parent directory inode. */
	char *cp,		/* pointer to the component name to create. */
	vattr_t *vap,		/* vattr ptr for type & mode information. */
	vcexcl_t ex,		/* exclusive create flag. */
	int mode,		/* file mode information. */
	vnode_t **vpp,		/* pointer pointer to returned vnode. */
	cred_t *credp,		/* credentials pointer. */
	int filemode)		/* open file mode */
{
	int error = 0;
	sam_node_t *ip;
	struct sam_name name;	/* If no entry, slot info is returned here */
	int trans_size;
	int issync;
	int truncflag = 0;
	int terr = 0;
#ifdef LQFS_TODO_LOCKFS
	struct ulockfs *ulp;
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * Cannot set sticky bit unless superuser.
	 */
	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(credp)) {
		vap->va_mode &= ~VSVTX;
	}

lookup_name:

#ifdef LQFS_TODO_LOCKFS
	error = qfs_lockfs_begin(pip->mp, &ulp, ULOCKFS_CREATE_MASK);
	if (error) {
		return (error);
	}

	if (ulp) {
#endif /* LQFS_TODO_LOCKFS */
		/* Start LQFS create transaction */
		trans_size = (int)TOP_CREATE_SIZE(pip);
		TRANS_BEGIN_CSYNC(pip->mp, issync, TOP_CREATE, trans_size);
#ifdef LQFS_TODO_LOCKFS
	}
#endif /* LQFS_TODO_LOCKFS */

	RW_LOCK_OS(&pip->data_rwl, RW_WRITER);
	name.operation = SAM_CREATE;
	if ((error = sam_lookup_name(pip, cp, &ip, &name, credp)) == ENOENT) {
		if (((error = sam_create_name(pip, cp, &ip, &name,
		    vap, credp)) != 0) &&
		    IS_SAM_ENOSPC(error)) {
			RW_UNLOCK_OS(&pip->data_rwl, RW_WRITER);
			/*
			 * Temporarily end LQFS create transaction
			 */
#ifdef LQFS_TODO_LOCKFS
			if (ulp) {
#endif /* LQFS_TODO_LOCKFS */
				TRANS_END_CSYNC(pip->mp, terr, issync,
				    TOP_CREATE, trans_size);
#ifdef LQFS_TODO_LOCKFS
			}
#endif /* LQFS_TODO_LOCKFS */
			error = sam_wait_space(pip, error);
			if (error == 0) {
				error = terr;
			}
			if (error) {
				return (error);
			}
			goto lookup_name;
		}
		RW_UNLOCK_OS(&pip->data_rwl, RW_WRITER);

	} else if (error == 0) {	/* If entry already exists. */
		RW_UNLOCK_OS(&pip->data_rwl, RW_WRITER);
		error = EEXIST;

		if (ex == NONEXCL) {	/* If non-exclusive create */
			if ((S_ISDIR(ip->di.mode) ||
			    S_ISATTRDIR(ip->di.mode)) &&
			    (mode & S_IWRITE)) {
				/* Cannot create over an existing dir. */
				error = EISDIR;
			} else if (SAM_PRIVILEGE_INO(ip->di.version,
			    ip->di.id.ino)) {
				/* Cannot create over privileged inodes */
				error = EPERM;
			} else if (mode) {	/* Check mode if set */
				error = sam_access_ino(ip, mode, FALSE, credp);
			} else {
				error = 0;
			}
			if ((error == 0) && S_ISREG(ip->di.mode) &&
			    (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) {
				/*
				 * If logging, do the truncate after the
				 * LQFS create transaction is logged.
				 */
				if (TRANS_ISTRANS(ip->mp)) {
					truncflag++;
				} else {
					RW_LOCK_OS(&ip->inode_rwl, RW_WRITER);
					error = sam_clear_file(ip, 0,
					    STALE_ARCHIVE, credp);
					RW_UNLOCK_OS(&ip->inode_rwl, RW_WRITER);
				}
				if (error == 0) {
					VNEVENT_CREATE_OS(SAM_ITOV(ip), NULL);
				}
			}
		}
		/*
		 * Cannot do the following as it caused a stale of
		 * offline copies.
		 */
#if	0
		if ((error == 0) && ((mode & O_CREAT) == 0)) {
			TRANS_INODE(ip->mp, ip);
			sam_mark_ino(ip, SAM_UPDATED|SAM_CHANGED);
		}
#endif
		if (error) {
			VN_RELE(SAM_ITOV(ip));	/* Decrement v_count if error */
		}
	} else {
		RW_UNLOCK_OS(&pip->data_rwl, RW_WRITER);
	}
#ifdef LQFS_TODO_LOCKFS
	if (ulp) {
#endif /* LQFS_TODO_LOCKFS */
		TRANS_END_CSYNC(pip->mp, terr, issync, TOP_CREATE, trans_size);
		/*
		 * If we haven't had a more interesting failure
		 * already, then anything that might've happened
		 * here should be reported.
		 */
		if (error == 0) {
			error = terr;
		}
#ifdef LQFS_TODO_LOCKFS
	}
#endif /* LQFS_TODO_LOCKFS */

	if (!error && truncflag) {
		(void) TRANS_ITRUNC(ip, (u_offset_t)0, STALE_ARCHIVE, credp);
	}

#ifdef LQFS_TODO_LOCKFS
	if (ulp) {
		qfs_lockfs_end(ulp);
	}
#endif /* LQFS_TODO_LOCKFS */

	if (error == 0) {
		*vpp = SAM_ITOV(ip);
		TRACE(T_SAM_CREATE_RET, SAM_ITOV(pip), (sam_tr_t)* vpp,
		    ip->di.id.ino, error);
	}
	return (error);
}
Exemplo n.º 6
0
/*
 * ufs_alloc_data - supports allocating space and reads or writes
 * that involve changes to file length or space allocation.
 *
 * This function is more expensive, because of the UFS log transaction,
 * so ufs_rdwr_data() should be used when space or file length changes
 * will not occur.
 *
 * Inputs:
 * fdb - A null pointer instructs this function to only allocate
 *	space for the specified offset and length.
 *	An actual fdbuffer instructs this function to perform
 *	the read or write operation.
 * flags - defaults (zero value) to synchronous write
 *	B_READ - indicates read operation
 *	B_ASYNC - indicates perform operation asynchronously
 */
int
ufs_alloc_data(
	vnode_t		*vnodep,
	u_offset_t	offset,
	size_t		*len,
	fdbuffer_t	*fdbp,
	int		flags,
	cred_t		*credp)
{
	struct inode	*ip = VTOI(vnodep);
	size_t		done_len, io_len;
	int		contig;
	u_offset_t	uoff, io_off;
	int		error = 0;		/* No error occured */
	int		offsetn;		/* Start point this IO */
	int		nbytes;			/* Number bytes in this IO */
	daddr_t		bn;
	struct fs	*fs;
	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
	int		i_size_changed = 0;
	u_offset_t	old_i_size;
	struct ulockfs	*ulp;
	int		trans_size;
	int		issync;			/* UFS Log transaction */
						/* synchronous when non-zero */

	int		io_started = 0;		/* No IO started */
	uint_t		protp = PROT_ALL;

	ASSERT((flags & B_WRITE) == 0);

	/*
	 * Obey the lockfs protocol
	 */
	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
	if (error) {
		if ((fdbp != NULL) && (flags & B_ASYNC)) {
			fdb_ioerrdone(fdbp, error);
		}
		return (error);
	}
	if (ulp) {
		/*
		 * Try to begin a UFS log transaction
		 */
		trans_size = TOP_GETPAGE_SIZE(ip);
		TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
		    trans_size, error);
		if (error == EWOULDBLOCK) {
			ufs_lockfs_end(ulp);
			if ((fdbp != NULL) && (flags & B_ASYNC)) {
				fdb_ioerrdone(fdbp, EDEADLK);
			}
			return (EDEADLK);
		}
	}

	uoff = offset;
	io_off = offset;
	io_len = *len;
	done_len = 0;

	DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
	    uoff, (io_len - done_len), ip->i_size, (void *)fdbp));

	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
	rw_enter(&ip->i_contents, RW_WRITER);

	ASSERT((ip->i_mode & IFMT) == IFREG);

	fs = ip->i_fs;

	while (error == 0 && done_len < io_len) {
		uoff = (u_offset_t)(io_off + done_len);
		offsetn = (int)blkoff(fs, uoff);
		nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);

		DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
		    uoff, nbytes));

		if (uoff + nbytes > ip->i_size) {
			/*
			 * We are extending the length of the file.
			 * bmap is used so that we are sure that
			 * if we need to allocate new blocks, that it
			 * is done here before we up the file size.
			 */
			DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
			    ip->i_size, uoff + nbytes));

			error = bmap_write(ip, uoff, (offsetn + nbytes),
			    BI_ALLOC_ONLY, NULL, credp);
			if (ip->i_flag & (ICHG|IUPD))
				ip->i_seq++;
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
				    "failed err: %d\n", error));
				break;
			}
			if (fdbp != NULL) {
				if (uoff >= ip->i_size) {
					/*
					 * Desired offset is past end of bytes
					 * in file, so we have a hole.
					 */
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				} else {
					int contig;
					buf_t *bp;

					error = bmap_read(ip, uoff, &bn,
					    &contig);
					if (error) {
						break;
					}

					contig = ip->i_size - uoff;
					contig = P2ROUNDUP(contig, DEV_BSIZE);

					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
					if (contig > (ip->i_size - uoff)) {
						contig -= ip->i_size - uoff;

						fdb_add_hole(fdbp,
						    ip->i_size - offset,
						    contig);
					}
				}
			}

			i_size_changed = 1;
			old_i_size = ip->i_size;
			UFS_SET_ISIZE(uoff + nbytes, ip);
			TRANS_INODE(ip->i_ufsvfs, ip);
			/*
			 * file has grown larger than 2GB. Set flag
			 * in superblock to indicate this, if it
			 * is not already set.
			 */
			if ((ip->i_size > MAXOFF32_T) &&
			    !(fs->fs_flags & FSLARGEFILES)) {
				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
				mutex_enter(&ufsvfsp->vfs_lock);
				fs->fs_flags |= FSLARGEFILES;
				ufs_sbwrite(ufsvfsp);
				mutex_exit(&ufsvfsp->vfs_lock);
			}
		} else {
			/*
			 * The file length is not being extended.
			 */
			error = bmap_read(ip, uoff, &bn, &contig);
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: "
				    "bmap_read err: %d\n", error));
				break;
			}

			if (bn != UFS_HOLE) {
				/*
				 * Did not map a hole in the file
				 */
				int	contig = P2ROUNDUP(nbytes, DEV_BSIZE);
				buf_t	*bp;

				if (fdbp != NULL) {
					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
				}
			} else {
				/*
				 * We read a hole in the file.
				 * We have to allocate blocks for the hole.
				 */
				error = bmap_write(ip, uoff, (offsetn + nbytes),
				    BI_ALLOC_ONLY, NULL, credp);
				if (ip->i_flag & (ICHG|IUPD))
					ip->i_seq++;
				if (error) {
					DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
					    " hole failed error: %d\n", error));
					break;
				}
				if (fdbp != NULL) {
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				}
			}
		}
		done_len += nbytes;
	}

	if (error) {
		if (i_size_changed) {
			/*
			 * Allocation of the blocks for the file failed.
			 * So truncate the file size back to its original size.
			 */
			(void) ufs_itrunc(ip, old_i_size, 0, credp);
		}
	}

	DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
	    uoff, (io_len - done_len)));

	if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
		*len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
	} else {
		*len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
	}

	/*
	 * Flush cached pages.
	 *
	 * XXX - There should be no pages involved, since the I/O was performed
	 * through the device strategy routine and the page cache was bypassed.
	 * However, testing has demonstrated that this VOP_PUTPAGE is
	 * necessary. Without this, data might not always be read back as it
	 * was written.
	 *
	 */
	(void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp);

	rw_exit(&ip->i_contents);
	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);

	if ((fdbp != NULL) && (flags & B_ASYNC)) {
		/*
		 * Show that no more asynchronous IO will be added
		 */
		fdb_ioerrdone(fdbp, error);
	}
	if (ulp) {
		/*
		 * End the UFS Log transaction
		 */
		TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
		    trans_size);
		ufs_lockfs_end(ulp);
	}
	if (io_started && (flags & B_ASYNC)) {
		return (0);
	} else {
		return (error);
	}
}