/*
 * Convert the shortform directory to block form.
 */
int						/* error */
xfs_dir2_sf_to_block(
	xfs_da_args_t		*args)		/* operation arguments */
{
	xfs_dir2_db_t		blkno;		/* dir-relative block # (0) */
	xfs_dir2_block_t	*block;		/* block structure */
	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
	xfs_dabuf_t		*bp;		/* block buffer */
	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
	char			*buf;		/* sf buffer */
	int			buf_len;
	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			dummy;		/* trash */
	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
	int			endoffset;	/* end of data objects */
	int			error;		/* error return value */
	int			i;		/* index */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needlog;	/* need to log block header */
	int			needscan;	/* need to scan block freespc */
	int			newoffset;	/* offset from current entry */
	int			offset;		/* target block offset */
	xfs_dir2_sf_entry_t	*sfep;		/* sf entry pointer */
	xfs_dir2_sf_t		*sfp;		/* shortform structure */
	__be16			*tagp;		/* end of data entry */
	xfs_trans_t		*tp;		/* transaction pointer */
	struct xfs_name		name;

	xfs_dir2_trace_args("sf_to_block", args);
	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
	/*
	 * Bomb out if the shortform directory is way too short.
	 */
	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
		ASSERT(XFS_FORCED_SHUTDOWN(mp));
		return XFS_ERROR(EIO);
	}
	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
	ASSERT(dp->i_df.if_u1.if_data != NULL);
	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
	/*
	 * Copy the directory into the stack buffer.
	 * Then pitch the incore inode data so we can make extents.
	 */

	buf_len = dp->i_df.if_bytes;
	buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);

	memcpy(buf, sfp, dp->i_df.if_bytes);
	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
	dp->i_d.di_size = 0;
	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
	/*
	 * Reset pointer - old sfp is gone.
	 */
	sfp = (xfs_dir2_sf_t *)buf;
	/*
	 * Add block 0 to the inode.
	 */
	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
	if (error) {
		kmem_free(buf);
		return error;
	}
	/*
	 * Initialize the data block.
	 */
	error = xfs_dir2_data_init(args, blkno, &bp);
	if (error) {
		kmem_free(buf);
		return error;
	}
	block = bp->data;
	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
	/*
	 * Compute size of block "tail" area.
	 */
	i = (uint)sizeof(*btp) +
	    (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
	/*
	 * The whole thing is initialized to free by the init routine.
	 * Say we're using the leaf and tail area.
	 */
	dup = (xfs_dir2_data_unused_t *)block->u;
	needlog = needscan = 0;
	xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
		&needscan);
	ASSERT(needscan == 0);
	/*
	 * Fill in the tail.
	 */
	btp = xfs_dir2_block_tail_p(mp, block);
	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
	btp->stale = 0;
	blp = xfs_dir2_block_leaf_p(btp);
	endoffset = (uint)((char *)blp - (char *)block);
	/*
	 * Remove the freespace, we'll manage it.
	 */
	xfs_dir2_data_use_free(tp, bp, dup,
		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
		be16_to_cpu(dup->length), &needlog, &needscan);
	/*
	 * Create entry for .
	 */
	dep = (xfs_dir2_data_entry_t *)
	      ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
	dep->inumber = cpu_to_be64(dp->i_ino);
	dep->namelen = 1;
	dep->name[0] = '.';
	tagp = xfs_dir2_data_entry_tag_p(dep);
	*tagp = cpu_to_be16((char *)dep - (char *)block);
	xfs_dir2_data_log_entry(tp, bp, dep);
	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
				(char *)dep - (char *)block));
	/*
	 * Create entry for ..
	 */
	dep = (xfs_dir2_data_entry_t *)
		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
	dep->namelen = 2;
	dep->name[0] = dep->name[1] = '.';
	tagp = xfs_dir2_data_entry_tag_p(dep);
	*tagp = cpu_to_be16((char *)dep - (char *)block);
	xfs_dir2_data_log_entry(tp, bp, dep);
	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
				(char *)dep - (char *)block));
	offset = XFS_DIR2_DATA_FIRST_OFFSET;
	/*
	 * Loop over existing entries, stuff them in.
	 */
	if ((i = 0) == sfp->hdr.count)
		sfep = NULL;
	else
		sfep = xfs_dir2_sf_firstentry(sfp);
	/*
	 * Need to preserve the existing offset values in the sf directory.
	 * Insert holes (unused entries) where necessary.
	 */
	while (offset < endoffset) {
		/*
		 * sfep is null when we reach the end of the list.
		 */
		if (sfep == NULL)
			newoffset = endoffset;
		else
			newoffset = xfs_dir2_sf_get_offset(sfep);
		/*
		 * There should be a hole here, make one.
		 */
		if (offset < newoffset) {
			dup = (xfs_dir2_data_unused_t *)
			      ((char *)block + offset);
			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
			dup->length = cpu_to_be16(newoffset - offset);
			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
				((char *)dup - (char *)block));
			xfs_dir2_data_log_unused(tp, bp, dup);
			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
				dup, &dummy);
			offset += be16_to_cpu(dup->length);
			continue;
		}
		/*
		 * Copy a real entry.
		 */
		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
		dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
				xfs_dir2_sf_inumberp(sfep)));
		dep->namelen = sfep->namelen;
		memcpy(dep->name, sfep->name, dep->namelen);
		tagp = xfs_dir2_data_entry_tag_p(dep);
		*tagp = cpu_to_be16((char *)dep - (char *)block);
		xfs_dir2_data_log_entry(tp, bp, dep);
		name.name = sfep->name;
		name.len = sfep->namelen;
		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
							hashname(&name));
		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
						 (char *)dep - (char *)block));
		offset = (int)((char *)(tagp + 1) - (char *)block);
		if (++i == sfp->hdr.count)
			sfep = NULL;
		else
			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
	}
	/* Done with the temporary buffer */
	kmem_free(buf);
	/*
	 * Sort the leaf entries by hash value.
	 */
	xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort);
	/*
	 * Log the leaf entry area and tail.
	 * Already logged the header in data_init, ignore needlog.
	 */
	ASSERT(needscan == 0);
	xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
	xfs_dir2_block_log_tail(tp, bp);
	xfs_dir2_data_check(dp, bp);
	xfs_da_buf_done(bp);
	return 0;
}
示例#2
0
/*
 * This is called to fill in the vector of log iovecs for the
 * given inode log item.  It fills the first item with an inode
 * log format structure, the second with the on-disk inode structure,
 * and a possible third and/or fourth with the inode data/extents/b-tree
 * root and inode attributes data/extents/b-tree root.
 */
STATIC void
xfs_inode_item_format(
	xfs_inode_log_item_t	*iip,
	xfs_log_iovec_t		*log_vector)
{
	uint			nvecs;
	xfs_log_iovec_t		*vecp;
	xfs_inode_t		*ip;
	size_t			data_bytes;
	xfs_bmbt_rec_t		*ext_buffer;
	int			nrecs;
	xfs_mount_t		*mp;

	ip = iip->ili_inode;
	vecp = log_vector;

	vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
	vecp->i_len  = sizeof(xfs_inode_log_format_t);
	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
	vecp++;
	nvecs	     = 1;

	/*
	 * Clear i_update_core if the timestamps (or any other
	 * non-transactional modification) need flushing/logging
	 * and we're about to log them with the rest of the core.
	 *
	 * This is the same logic as xfs_iflush() but this code can't
	 * run at the same time as xfs_iflush because we're in commit
	 * processing here and so we have the inode lock held in
	 * exclusive mode.  Although it doesn't really matter
	 * for the timestamps if both routines were to grab the
	 * timestamps or not.  That would be ok.
	 *
	 * We clear i_update_core before copying out the data.
	 * This is for coordination with our timestamp updates
	 * that don't hold the inode lock. They will always
	 * update the timestamps BEFORE setting i_update_core,
	 * so if we clear i_update_core after they set it we
	 * are guaranteed to see their updates to the timestamps
	 * either here.  Likewise, if they set it after we clear it
	 * here, we'll see it either on the next commit of this
	 * inode or the next time the inode gets flushed via
	 * xfs_iflush().  This depends on strongly ordered memory
	 * semantics, but we have that.  We use the SYNCHRONIZE
	 * macro to make sure that the compiler does not reorder
	 * the i_update_core access below the data copy below.
	 */
	if (ip->i_update_core)  {
		ip->i_update_core = 0;
		SYNCHRONIZE();
	}

	/*
	 * We don't have to worry about re-ordering here because
	 * the update_size field is protected by the inode lock
	 * and we have that held in exclusive mode.
	 */
	if (ip->i_update_size)
		ip->i_update_size = 0;

	/*
	 * Make sure to get the latest atime from the Linux inode.
	 */
	xfs_synchronize_atime(ip);

	/*
	 * make sure the linux inode is dirty
	 */
	xfs_mark_inode_dirty_sync(ip);

	vecp->i_addr = (xfs_caddr_t)&ip->i_d;
	vecp->i_len  = sizeof(xfs_dinode_core_t);
	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
	vecp++;
	nvecs++;
	iip->ili_format.ilf_fields |= XFS_ILOG_CORE;

	/*
	 * If this is really an old format inode, then we need to
	 * log it as such.  This means that we have to copy the link
	 * count from the new field to the old.  We don't have to worry
	 * about the new fields, because nothing trusts them as long as
	 * the old inode version number is there.  If the superblock already
	 * has a new version number, then we don't bother converting back.
	 */
	mp = ip->i_mount;
	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
	       xfs_sb_version_hasnlink(&mp->m_sb));
	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
			/*
			 * Convert it back.
			 */
			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
			ip->i_d.di_onlink = ip->i_d.di_nlink;
		} else {
			/*
			 * The superblock version has already been bumped,
			 * so just make the conversion to the new inode
			 * format permanent.
			 */
			ip->i_d.di_version = XFS_DINODE_VERSION_2;
			ip->i_d.di_onlink = 0;
			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
		}
	}

	switch (ip->i_d.di_format) {
	case XFS_DINODE_FMT_EXTENTS:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) {
			ASSERT(ip->i_df.if_bytes > 0);
			ASSERT(ip->i_df.if_u1.if_extents != NULL);
			ASSERT(ip->i_d.di_nextents > 0);
			ASSERT(iip->ili_extents_buf == NULL);
			nrecs = ip->i_df.if_bytes /
				(uint)sizeof(xfs_bmbt_rec_t);
			ASSERT(nrecs > 0);
#ifdef XFS_NATIVE_HOST
			if (nrecs == ip->i_d.di_nextents) {
				/*
				 * There are no delayed allocation
				 * extents, so just point to the
				 * real extents array.
				 */
				vecp->i_addr =
					(char *)(ip->i_df.if_u1.if_extents);
				vecp->i_len = ip->i_df.if_bytes;
				XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
			} else
#endif
			{
				/*
				 * There are delayed allocation extents
				 * in the inode, or we need to convert
				 * the extents to on disk format.
				 * Use xfs_iextents_copy()
				 * to copy only the real extents into
				 * a separate buffer.  We'll free the
				 * buffer in the unlock routine.
				 */
				ext_buffer = kmem_alloc(ip->i_df.if_bytes,
					KM_SLEEP);
				iip->ili_extents_buf = ext_buffer;
				vecp->i_addr = (xfs_caddr_t)ext_buffer;
				vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
						XFS_DATA_FORK);
				XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
			}
			ASSERT(vecp->i_len <= ip->i_df.if_bytes);
			iip->ili_format.ilf_dsize = vecp->i_len;
			vecp++;
			nvecs++;
		}
		break;

	case XFS_DINODE_FMT_BTREE:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_DDATA | XFS_ILOG_DEXT |
			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
			ASSERT(ip->i_df.if_broot_bytes > 0);
			ASSERT(ip->i_df.if_broot != NULL);
			vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
			vecp->i_len = ip->i_df.if_broot_bytes;
			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
			vecp++;
			nvecs++;
			iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
		}
		break;

	case XFS_DINODE_FMT_LOCAL:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) {
			ASSERT(ip->i_df.if_bytes > 0);
			ASSERT(ip->i_df.if_u1.if_data != NULL);
			ASSERT(ip->i_d.di_size > 0);

			vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data;
			/*
			 * Round i_bytes up to a word boundary.
			 * The underlying memory is guaranteed to
			 * to be there by xfs_idata_realloc().
			 */
			data_bytes = roundup(ip->i_df.if_bytes, 4);
			ASSERT((ip->i_df.if_real_bytes == 0) ||
			       (ip->i_df.if_real_bytes == data_bytes));
			vecp->i_len = (int)data_bytes;
			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
			vecp++;
			nvecs++;
			iip->ili_format.ilf_dsize = (unsigned)data_bytes;
		}
		break;

	case XFS_DINODE_FMT_DEV:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
			  XFS_ILOG_DDATA | XFS_ILOG_UUID)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
			iip->ili_format.ilf_u.ilfu_rdev =
				ip->i_df.if_u2.if_rdev;
		}
		break;

	case XFS_DINODE_FMT_UUID:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
			  XFS_ILOG_DDATA | XFS_ILOG_DEV)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
			iip->ili_format.ilf_u.ilfu_uuid =
				ip->i_df.if_u2.if_uuid;
		}
		break;

	default:
		ASSERT(0);
		break;
	}

	/*
	 * If there are no attributes associated with the file,
	 * then we're done.
	 * Assert that no attribute-related log flags are set.
	 */
	if (!XFS_IFORK_Q(ip)) {
		ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
		iip->ili_format.ilf_size = nvecs;
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
		return;
	}

	switch (ip->i_d.di_aformat) {
	case XFS_DINODE_FMT_EXTENTS:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
			ASSERT(ip->i_afp->if_bytes > 0);
			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
			ASSERT(ip->i_d.di_anextents > 0);
#ifdef DEBUG
			nrecs = ip->i_afp->if_bytes /
				(uint)sizeof(xfs_bmbt_rec_t);
#endif
			ASSERT(nrecs > 0);
			ASSERT(nrecs == ip->i_d.di_anextents);
#ifdef XFS_NATIVE_HOST
			/*
			 * There are not delayed allocation extents
			 * for attributes, so just point at the array.
			 */
			vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
			vecp->i_len = ip->i_afp->if_bytes;
#else
			ASSERT(iip->ili_aextents_buf == NULL);
			/*
			 * Need to endian flip before logging
			 */
			ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
				KM_SLEEP);
			iip->ili_aextents_buf = ext_buffer;
			vecp->i_addr = (xfs_caddr_t)ext_buffer;
			vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
					XFS_ATTR_FORK);
#endif
			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
			iip->ili_format.ilf_asize = vecp->i_len;
			vecp++;
			nvecs++;
		}
		break;

	case XFS_DINODE_FMT_BTREE:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_ADATA | XFS_ILOG_AEXT)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
			ASSERT(ip->i_afp->if_broot_bytes > 0);
			ASSERT(ip->i_afp->if_broot != NULL);
			vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
			vecp->i_len = ip->i_afp->if_broot_bytes;
			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
			vecp++;
			nvecs++;
			iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
		}
		break;

	case XFS_DINODE_FMT_LOCAL:
		ASSERT(!(iip->ili_format.ilf_fields &
			 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
		if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
			ASSERT(ip->i_afp->if_bytes > 0);
			ASSERT(ip->i_afp->if_u1.if_data != NULL);

			vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data;
			/*
			 * Round i_bytes up to a word boundary.
			 * The underlying memory is guaranteed to
			 * to be there by xfs_idata_realloc().
			 */
			data_bytes = roundup(ip->i_afp->if_bytes, 4);
			ASSERT((ip->i_afp->if_real_bytes == 0) ||
			       (ip->i_afp->if_real_bytes == data_bytes));
			vecp->i_len = (int)data_bytes;
			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
			vecp++;
			nvecs++;
			iip->ili_format.ilf_asize = (unsigned)data_bytes;
		}
		break;

	default:
		ASSERT(0);
		break;
	}

	ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
	iip->ili_format.ilf_size = nvecs;
}
示例#3
0
/*
 * Lock a directory entry.  A dirlock on <dzp, name> protects that name
 * in dzp's directory zap object.  As long as you hold a dirlock, you can
 * assume two things: (1) dzp cannot be reaped, and (2) no other thread
 * can change the zap entry for (i.e. link or unlink) this name.
 *
 * Input arguments:
 *	dzp	- znode for directory
 *	name	- name of entry to lock
 *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
 *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
 *		  ZSHARED: allow concurrent access with other ZSHARED callers.
 *		  ZXATTR: we want dzp's xattr directory
 *		  ZCILOOK: On a mixed sensitivity file system,
 *			   this lookup should be case-insensitive.
 *		  ZCIEXACT: On a purely case-insensitive file system,
 *			    this lookup should be case-sensitive.
 *		  ZRENAMING: we are locking for renaming, force narrow locks
 *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
 *			     current thread already holds it.
 *
 * Output arguments:
 *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
 *	dlpp	- pointer to the dirlock for this entry (NULL on error)
 *      direntflags - (case-insensitive lookup only)
 *		flags if multiple case-sensitive matches exist in directory
 *      realpnp     - (case-insensitive lookup only)
 *		actual name matched within the directory
 *
 * Return value: 0 on success or errno on failure.
 *
 * NOTE: Always checks for, and rejects, '.' and '..'.
 * NOTE: For case-insensitive file systems we take wide locks (see below),
 *	 but return znode pointers to a single match.
 */
int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
    int flag, int *direntflags, pathname_t *realpnp)
{
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	zfs_dirlock_t	*dl;
	boolean_t	update;
	boolean_t	exact;
	uint64_t	zoid;
#ifdef HAVE_DNLC
	vnode_t		*vp = NULL;
#endif /* HAVE_DNLC */
	int		error = 0;
	int		cmpflags;

	*zpp = NULL;
	*dlpp = NULL;

	/*
	 * Verify that we are not trying to lock '.', '..', or '.zfs'
	 */
	if ((name[0] == '.' &&
	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
		return (SET_ERROR(EEXIST));

	/*
	 * Case sensitivity and normalization preferences are set when
	 * the file system is created.  These are stored in the
	 * zsb->z_case and zsb->z_norm fields.  These choices
	 * affect what vnodes can be cached in the DNLC, how we
	 * perform zap lookups, and the "width" of our dirlocks.
	 *
	 * A normal dirlock locks a single name.  Note that with
	 * normalization a name can be composed multiple ways, but
	 * when normalized, these names all compare equal.  A wide
	 * dirlock locks multiple names.  We need these when the file
	 * system is supporting mixed-mode access.  It is sometimes
	 * necessary to lock all case permutations of file name at
	 * once so that simultaneous case-insensitive/case-sensitive
	 * behaves as rationally as possible.
	 */

	/*
	 * Decide if exact matches should be requested when performing
	 * a zap lookup on file systems supporting case-insensitive
	 * access.
	 */
	exact =
	    ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
	    ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));

	/*
	 * Only look in or update the DNLC if we are looking for the
	 * name on a file system that does not require normalization
	 * or case folding.  We can also look there if we happen to be
	 * on a non-normalizing, mixed sensitivity file system IF we
	 * are looking for the exact name.
	 *
	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
	 * case for performance improvement?
	 */
	update = !zsb->z_norm ||
	    ((zsb->z_case == ZFS_CASE_MIXED) &&
	    !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));

	/*
	 * ZRENAMING indicates we are in a situation where we should
	 * take narrow locks regardless of the file system's
	 * preferences for normalizing and case folding.  This will
	 * prevent us deadlocking trying to grab the same wide lock
	 * twice if the two names happen to be case-insensitive
	 * matches.
	 */
	if (flag & ZRENAMING)
		cmpflags = 0;
	else
		cmpflags = zsb->z_norm;

	/*
	 * Wait until there are no locks on this name.
	 *
	 * Don't grab the the lock if it is already held. However, cannot
	 * have both ZSHARED and ZHAVELOCK together.
	 */
	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
	if (!(flag & ZHAVELOCK))
		rw_enter(&dzp->z_name_lock, RW_READER);

	mutex_enter(&dzp->z_lock);
	for (;;) {
		if (dzp->z_unlinked) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
				break;
		}
		if (error != 0) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		if (dl == NULL)	{
			/*
			 * Allocate a new dirlock and add it to the list.
			 */
			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
			dl->dl_name = name;
			dl->dl_sharecnt = 0;
			dl->dl_namelock = 0;
			dl->dl_namesize = 0;
			dl->dl_dzp = dzp;
			dl->dl_next = dzp->z_dirlocks;
			dzp->z_dirlocks = dl;
			break;
		}
		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
			break;
		cv_wait(&dl->dl_cv, &dzp->z_lock);
	}

	/*
	 * If the z_name_lock was NOT held for this dirlock record it.
	 */
	if (flag & ZHAVELOCK)
		dl->dl_namelock = 1;

	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
		/*
		 * We're the second shared reference to dl.  Make a copy of
		 * dl_name in case the first thread goes away before we do.
		 * Note that we initialize the new name before storing its
		 * pointer into dl_name, because the first thread may load
		 * dl->dl_name at any time.  He'll either see the old value,
		 * which is his, or the new shared copy; either is OK.
		 */
		dl->dl_namesize = strlen(dl->dl_name) + 1;
		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
		bcopy(dl->dl_name, name, dl->dl_namesize);
		dl->dl_name = name;
	}

	mutex_exit(&dzp->z_lock);

	/*
	 * We have a dirlock on the name.  (Note that it is the dirlock,
	 * not the dzp's z_lock, that protects the name in the zap object.)
	 * See if there's an object by this name; if so, put a hold on it.
	 */
	if (flag & ZXATTR) {
		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
		    sizeof (zoid));
		if (error == 0)
			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
	} else {
#ifdef HAVE_DNLC
		if (update)
			vp = dnlc_lookup(ZTOI(dzp), name);
		if (vp == DNLC_NO_VNODE) {
			iput(vp);
			error = SET_ERROR(ENOENT);
		} else if (vp) {
			if (flag & ZNEW) {
				zfs_dirent_unlock(dl);
				iput(vp);
				return (SET_ERROR(EEXIST));
			}
			*dlpp = dl;
			*zpp = VTOZ(vp);
			return (0);
		} else {
			error = zfs_match_find(zsb, dzp, name, exact,
			    update, direntflags, realpnp, &zoid);
		}
#else
		error = zfs_match_find(zsb, dzp, name, exact,
		    update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
	}
	if (error) {
		if (error != ENOENT || (flag & ZEXISTS)) {
			zfs_dirent_unlock(dl);
			return (error);
		}
	} else {
		if (flag & ZNEW) {
			zfs_dirent_unlock(dl);
			return (SET_ERROR(EEXIST));
		}
		error = zfs_zget(zsb, zoid, zpp);
		if (error) {
			zfs_dirent_unlock(dl);
			return (error);
		}
#ifdef HAVE_DNLC
		if (!(flag & ZXATTR) && update)
			dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
#endif /* HAVE_DNLC */
	}

	*dlpp = dl;

	return (0);
}
示例#4
0
/*
 * find prom phys pages and alloc space for a tmp copy
 */
static int
i_cpr_find_ppages(void)
{
	struct page *pp;
	struct memlist *pmem;
	pgcnt_t npages, pcnt, scnt, vcnt;
	pfn_t ppn, plast, *dst;
	int mapflag;

	cpr_clear_bitmaps();
	mapflag = REGULAR_BITMAP;

	/*
	 * there should be a page_t for each phys page used by the kernel;
	 * set a bit for each phys page not tracked by a page_t
	 */
	pcnt = 0;
	memlist_read_lock();
	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
		npages = mmu_btop(pmem->ml_size);
		ppn = mmu_btop(pmem->ml_address);
		for (plast = ppn + npages; ppn < plast; ppn++) {
			if (page_numtopp_nolock(ppn))
				continue;
			(void) cpr_setbit(ppn, mapflag);
			pcnt++;
		}
	}
	memlist_read_unlock();

	/*
	 * clear bits for phys pages in each segment
	 */
	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);

	/*
	 * set bits for phys pages referenced by the promvp vnode;
	 * these pages are mostly comprised of forthdebug words
	 */
	vcnt = 0;
	for (pp = promvp.v_pages; pp; ) {
		if (cpr_setbit(pp->p_offset, mapflag) == 0)
			vcnt++;
		pp = pp->p_vpnext;
		if (pp == promvp.v_pages)
			break;
	}

	/*
	 * total number of prom pages are:
	 * (non-page_t pages - seg pages + vnode pages)
	 */
	ppage_count = pcnt - scnt + vcnt;
	CPR_DEBUG(CPR_DEBUG1,
	    "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
	    pcnt, scnt, vcnt, ppage_count);

	/*
	 * alloc array of pfn_t to store phys page list
	 */
	pphys_list_size = ppage_count * sizeof (pfn_t);
	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
	if (pphys_list == NULL) {
		cpr_err(CE_WARN, "cannot alloc pphys_list");
		return (ENOMEM);
	}

	/*
	 * phys pages referenced in the bitmap should be
	 * those used by the prom; scan bitmap and save
	 * a list of prom phys page numbers
	 */
	dst = pphys_list;
	memlist_read_lock();
	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
		npages = mmu_btop(pmem->ml_size);
		ppn = mmu_btop(pmem->ml_address);
		for (plast = ppn + npages; ppn < plast; ppn++) {
			if (cpr_isset(ppn, mapflag)) {
				ASSERT(dst < (pphys_list + ppage_count));
				*dst++ = ppn;
			}
		}
	}
	memlist_read_unlock();

	/*
	 * allocate space to store prom pages
	 */
	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
	if (ppage_buf == NULL) {
		kmem_free(pphys_list, pphys_list_size);
		pphys_list = NULL;
		cpr_err(CE_WARN, "cannot alloc ppage_buf");
		return (ENOMEM);
	}

	return (0);
}
示例#5
0
void *
kalloc_canblock(
		vm_size_t	size,
		boolean_t       canblock)
{
	zone_t z;

	if (size < MAX_SIZE_ZDLUT)
		z = get_zone_dlut(size);
	else if (size < kalloc_max_prerounded)
		z = get_zone_search(size, k_zindex_start);
	else {
		/*
		 * If size is too large for a zone, then use kmem_alloc.
		 * (We use kmem_alloc instead of kmem_alloc_kobject so that
		 * krealloc can use kmem_realloc.)
		 */
		vm_map_t alloc_map;
		void *addr;

		/* kmem_alloc could block so we return if noblock */
		if (!canblock) {
			return(NULL);
		}

		if (size >= kalloc_kernmap_size)
		        alloc_map = kernel_map;
		else
			alloc_map = kalloc_map;

		if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) {
			if (alloc_map != kernel_map) {
				if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS)
					addr = NULL;
			}
			else
				addr = NULL;
		}

		if (addr != NULL) {
			kalloc_spin_lock();
			/*
			 * Thread-safe version of the workaround for 4740071
			 * (a double FREE())
			 */
			if (size > kalloc_largest_allocated)
				kalloc_largest_allocated = size;

		        kalloc_large_inuse++;
		        kalloc_large_total += size;
			kalloc_large_sum += size;

			if (kalloc_large_total > kalloc_large_max)
			        kalloc_large_max = kalloc_large_total;

			kalloc_unlock();

			KALLOC_ZINFO_SALLOC(size);
		}
		return(addr);
	}
#ifdef KALLOC_DEBUG
	if (size > z->elem_size)
		panic("%s: z %p (%s) but requested size %lu", __func__,
		    z, z->zone_name, (unsigned long)size);
#endif
	assert(size <= z->elem_size);
	return (zalloc_canblock(z, canblock));
}
示例#6
0
/*
 * Create a new DMU object to hold a zfs znode.
 *
 *	IN:	dzp	- parent directory for new znode
 *		vap	- file attributes for new znode
 *		tx	- dmu transaction id for zap operations
 *		cr	- credentials of caller
 *		flag	- flags:
 *			  IS_ROOT_NODE	- new object will be root
 *			  IS_XATTR	- new object is an attribute
 *		bonuslen - length of bonus buffer
 *		setaclp  - File/Dir initial ACL
 *		fuidp	 - Tracks fuid allocation.
 *
 *	OUT:	zpp	- allocated znode
 *
 */
void
zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
{
	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
	uint64_t	mode, size, links, parent, pflags;
	uint64_t	dzp_pflags = 0;
	uint64_t	rdev = 0;
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	dmu_buf_t	*db;
	timestruc_t	now;
	uint64_t	gen, obj;
	int		bonuslen;
	sa_handle_t	*sa_hdl;
	dmu_object_type_t obj_type;
	sa_bulk_attr_t	*sa_attrs;
	int		cnt = 0;
	zfs_acl_locator_cb_t locate = { 0 };

	if (zsb->z_replay) {
		obj = vap->va_nodeid;
		now = vap->va_ctime;		/* see zfs_replay_create() */
		gen = vap->va_nblocks;		/* ditto */
	} else {
		obj = 0;
		gethrestime(&now);
		gen = dmu_tx_get_txg(tx);
	}

	obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
	bonuslen = (obj_type == DMU_OT_SA) ?
	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;

	/*
	 * Create a new DMU object.
	 */
	/*
	 * There's currently no mechanism for pre-reading the blocks that will
	 * be needed to allocate a new object, so we accept the small chance
	 * that there will be an i/o error and we will fail one of the
	 * assertions below.
	 */
	if (S_ISDIR(vap->va_mode)) {
		if (zsb->z_replay) {
			VERIFY0(zap_create_claim_norm(zsb->z_os, obj,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx));
		} else {
			obj = zap_create_norm(zsb->z_os,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx);
		}
	} else {
		if (zsb->z_replay) {
			VERIFY0(dmu_object_claim(zsb->z_os, obj,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx));
		} else {
			obj = dmu_object_alloc(zsb->z_os,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx);
		}
	}

	ZFS_OBJ_HOLD_ENTER(zsb, obj);
	VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));

	/*
	 * If this is the root, fix up the half-initialized parent pointer
	 * to reference the just-allocated physical data area.
	 */
	if (flag & IS_ROOT_NODE) {
		dzp->z_id = obj;
	} else {
		dzp_pflags = dzp->z_pflags;
	}

	/*
	 * If parent is an xattr, so am I.
	 */
	if (dzp_pflags & ZFS_XATTR) {
		flag |= IS_XATTR;
	}

	if (zsb->z_use_fuids)
		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
	else
		pflags = 0;

	if (S_ISDIR(vap->va_mode)) {
		size = 2;		/* contents ("." and "..") */
		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
	} else {
		size = links = 0;
	}

	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
		rdev = vap->va_rdev;

	parent = dzp->z_id;
	mode = acl_ids->z_mode;
	if (flag & IS_XATTR)
		pflags |= ZFS_XATTR;

	/*
	 * No execs denied will be deterimed when zfs_mode_compute() is called.
	 */
	pflags |= acl_ids->z_aclp->z_hints &
	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);

	ZFS_TIME_ENCODE(&now, crtime);
	ZFS_TIME_ENCODE(&now, ctime);

	if (vap->va_mask & ATTR_ATIME) {
		ZFS_TIME_ENCODE(&vap->va_atime, atime);
	} else {
		ZFS_TIME_ENCODE(&now, atime);
	}

	if (vap->va_mask & ATTR_MTIME) {
		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
	} else {
		ZFS_TIME_ENCODE(&now, mtime);
	}

	/* Now add in all of the "SA" attributes */
	VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
	    &sa_hdl));

	/*
	 * Setup the array of attributes to be replaced/set on the new file
	 *
	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
	 * in the old znode_phys_t format.  Don't change this ordering
	 */
	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
	} else {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
		    NULL, &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
		    NULL, &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
	}

	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
		    &empty_xattr, 8);
	}
	if (obj_type == DMU_OT_ZNODE ||
	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
		    NULL, &rdev, 8);
	}
	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
		    &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
		    &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
		    sizeof (uint64_t) * 4);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
		    &acl_phys, sizeof (zfs_acl_phys_t));
	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
		    &acl_ids->z_aclp->z_acl_count, 8);
		locate.cb_aclp = acl_ids->z_aclp;
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
		    zfs_acl_data_locator, &locate,
		    acl_ids->z_aclp->z_acl_bytes);
		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
		    acl_ids->z_fuid, acl_ids->z_fgid);
	}

	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);

	if (!(flag & IS_ROOT_NODE)) {
		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
		    ZTOI(dzp));
		VERIFY(*zpp != NULL);
		VERIFY(dzp != NULL);
	} else {
		/*
		 * If we are creating the root node, the "parent" we
		 * passed in is the znode for the root.
		 */
		*zpp = dzp;

		(*zpp)->z_sa_hdl = sa_hdl;
	}

	(*zpp)->z_pflags = pflags;
	(*zpp)->z_mode = mode;

	if (obj_type == DMU_OT_ZNODE ||
	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
	}
	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
	ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
示例#7
0
文件: xpmem_fwd.c 项目: azet/kitten
/* Process an XPMEM_DOMID_REQUEST/RESPONSE/RELEASE command */
static int
xpmem_fwd_process_domid_cmd(struct xpmem_partition_state * part_state,
			    xpmem_link_t		   link,
			    struct xpmem_cmd_ex		 * cmd)
{
    struct xpmem_fwd_state * fwd_state = part_state->fwd_state;

    /* There's no reason not to reuse the input command struct for responses */
    struct xpmem_cmd_ex    * out_cmd  = cmd;
    xpmem_link_t	     out_link = link;

    switch (cmd->type) {
	case XPMEM_DOMID_REQUEST: {
	    /* A domid is requested by someone downstream from us on link
	     * 'link'. If we can't reach the nameserver, just return failure,
	     * because the request should not come through us unless we have a
	     * route already
	     */
	    if (!xpmem_have_ns_link(fwd_state)) {
		return -1;
	    }

	    /* Buffer the request */
	    {
		struct xpmem_domid_req_iter * iter = NULL;
		unsigned long		      flags = 0;

		iter = kmem_alloc(sizeof(struct xpmem_domid_req_iter));
		if (!iter) {
		    return -ENOMEM;
		}

		iter->link = link;

		spin_lock_irqsave(&(fwd_state->lock), flags);
		{
		    list_add_tail(&(iter->node), &(fwd_state->domid_req_list));
		}
		spin_unlock_irqrestore(&(fwd_state->lock), flags);

		/* Forward request up to the nameserver */
		out_link = fwd_state->ns_link;
	    }

	    break;
	}

	case XPMEM_DOMID_RESPONSE: {
	    int ret = 0;
	    /* We've been allocated a domid.
	     *
	     * If our domain has no domid, take it for ourselves it.
	     * Otherwise, assign it to a link that has requested a domid from us
	     */
	     
	    if (part_state->domid <= 0) {
		part_state->domid = cmd->domid_req.domid;

		/* Update the domid map to remember our own domid */
		ret = xpmem_add_domid(part_state, part_state->domid, part_state->local_link);

		if (ret == 0) {
		    XPMEM_ERR("Cannot insert domid %lli into hashtable", part_state->domid);
		    return -EFAULT;
		}

		return 0;
	    } else {
		struct xpmem_domid_req_iter * iter = NULL;
		unsigned long		      flags = 0;

		if (list_empty(&(fwd_state->domid_req_list))) {
		    XPMEM_ERR("We currently do not support the buffering of XPMEM domids");
		    return -1;
		}

		spin_lock_irqsave(&(fwd_state->lock), flags);
		{
		    iter = list_first_entry(&(fwd_state->domid_req_list),
				struct xpmem_domid_req_iter,
				node);
		    list_del(&(iter->node));
		}
		spin_unlock_irqrestore(&(fwd_state->lock), flags);

		/* Forward the domid to this link */
		out_link = iter->link;
		kmem_free(iter);

		/* Update the domid map to remember who has this */
		ret = xpmem_add_domid(part_state, cmd->domid_req.domid, out_link);

		if (ret == 0) {
		    XPMEM_ERR("Cannot insert domid %lli into hashtable", cmd->domid_req.domid);
		    return -EFAULT;
		}
	    }

	    break;
	}

	case XPMEM_DOMID_RELEASE:
	    /* Someone downstream is releasing their domid: simply forward to the
	     * namserver */
	    out_link = xpmem_search_domid(part_state, out_cmd->dst_dom);

	    if (out_link == 0) {
		XPMEM_ERR("Cannot find domid %lli in hashtable", out_cmd->dst_dom);
		return -EFAULT;
	    }

	    break;

	default: {
	    XPMEM_ERR("Unknown DOMID operation: %s", cmd_to_string(cmd->type));
	    return -EINVAL;
	}
    }

    /* Send the response */
    if (xpmem_send_cmd_link(part_state, out_link, out_cmd)) {
	XPMEM_ERR("Cannot send command on link %lli", out_link);
	return -EFAULT;
    }

    return 0;
}
示例#8
0
static int
vdev_disk_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_disk_t *dvd = vd->vdev_tsd;
	vdev_disk_buf_t *vdb;
	struct dk_callback *dkc;
	buf_t *bp;
	int error;

	if (zio->io_type == ZIO_TYPE_IOCTL) {
		/* XXPOLICY */
		if (!vdev_readable(vd)) {
			zio->io_error = SET_ERROR(ENXIO);
			return (ZIO_PIPELINE_CONTINUE);
		}

		switch (zio->io_cmd) {

		case DKIOCFLUSHWRITECACHE:

			if (zfs_nocacheflush)
				break;

			if (vd->vdev_nowritecache) {
				zio->io_error = SET_ERROR(ENOTSUP);
				break;
			}

			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
			zio->io_vsd_ops = &vdev_disk_vsd_ops;

			dkc->dkc_callback = vdev_disk_ioctl_done;
			dkc->dkc_flag = FLUSH_VOLATILE;
			dkc->dkc_cookie = zio;

			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);

			if (error == 0) {
				/*
				 * The ioctl will be done asychronously,
				 * and will call vdev_disk_ioctl_done()
				 * upon completion.
				 */
				return (ZIO_PIPELINE_STOP);
			}

			if (error == ENOTSUP || error == ENOTTY) {
				/*
				 * If we get ENOTSUP or ENOTTY, we know that
				 * no future attempts will ever succeed.
				 * In this case we set a persistent bit so
				 * that we don't bother with the ioctl in the
				 * future.
				 */
				vd->vdev_nowritecache = B_TRUE;
			}
			zio->io_error = error;

			break;

		default:
			zio->io_error = SET_ERROR(ENOTSUP);
		}

		return (ZIO_PIPELINE_CONTINUE);
	}

	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);

	vdb->vdb_io = zio;
	bp = &vdb->vdb_buf;

	bioinit(bp);
	bp->b_flags = B_BUSY | B_NOCACHE |
	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
		bp->b_flags |= B_FAILFAST;
	bp->b_bcount = zio->io_size;
	bp->b_un.b_addr = zio->io_data;
	bp->b_lblkno = lbtodb(zio->io_offset);
	bp->b_bufsize = zio->io_size;
	bp->b_iodone = (int (*)())vdev_disk_io_intr;

	/* ldi_strategy() will return non-zero only on programming errors */
	VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);

	return (ZIO_PIPELINE_STOP);
}
示例#9
0
/*
 * Given the root disk device devid or pathname, read the label from
 * the device, and construct a configuration nvlist.
 */
int
vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
{
	ldi_handle_t vd_lh;
	vdev_label_t *label;
	uint64_t s, size;
	int l;
	ddi_devid_t tmpdevid;
	int error = -1;
	char *minor_name;

	/*
	 * Read the device label and build the nvlist.
	 */
	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
	    &minor_name) == 0) {
		error = ldi_open_by_devid(tmpdevid, minor_name,
		    FREAD, kcred, &vd_lh, zfs_li);
		ddi_devid_free(tmpdevid);
		ddi_devid_str_free(minor_name);
	}

	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
	    zfs_li)))
		return (error);

	if (ldi_get_size(vd_lh, &s)) {
		(void) ldi_close(vd_lh, FREAD, kcred);
		return (SET_ERROR(EIO));
	}

	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);

	*config = NULL;
	for (l = 0; l < VDEV_LABELS; l++) {
		uint64_t offset, state, txg = 0;

		/* read vdev label */
		offset = vdev_label_offset(size, l, 0);
		if (vdev_disk_physio(vd_lh, (caddr_t)label,
		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
			continue;

		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
			*config = NULL;
			continue;
		}

		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
			nvlist_free(*config);
			*config = NULL;
			continue;
		}

		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
		    &txg) != 0 || txg == 0) {
			nvlist_free(*config);
			*config = NULL;
			continue;
		}

		break;
	}

	kmem_free(label, sizeof (vdev_label_t));
	(void) ldi_close(vd_lh, FREAD, kcred);
	if (*config == NULL)
		error = SET_ERROR(EIDRM);

	return (error);
}
int
s3c2440_i2s_alloc(void *handle,
		  int direction, size_t size, int flags,
		  s3c2440_i2s_buf_t *out)
{
	int kalloc_flags = KM_SLEEP;
	int dma_flags = BUS_DMA_WAITOK;
	int retval = 0;
	struct s3c2xx0_softc *sc = s3c2xx0_softc; /* Shortcut */
	s3c2440_i2s_buf_t buf;

	DPRINTF(("%s\n", __func__));

	if (flags & M_NOWAIT) {
		kalloc_flags = KM_NOSLEEP;
		dma_flags = BUS_DMA_NOWAIT;
	}

	*out = kmem_alloc(sizeof(struct s3c2440_i2s_buf), kalloc_flags);
	if (*out == NULL) {
		DPRINTF(("Failed to allocate memory\n"));
		return ENOMEM;
	}

	buf = *out;
	buf->i2b_parent = handle;
	buf->i2b_size = size;
	buf->i2b_nsegs = S3C2440_I2S_BUF_MAX_SEGS;
	buf->i2b_xfer = NULL;
	buf->i2b_cb = NULL;
	buf->i2b_cb_cookie = NULL;

	/* We first allocate some DMA-friendly memory for the buffer... */
	retval = bus_dmamem_alloc(sc->sc_dmat, buf->i2b_size, NBPG, 0,
				  buf->i2b_segs, buf->i2b_nsegs, &buf->i2b_nsegs,
				  dma_flags);
	if (retval != 0) {
		printf("%s: Failed to allocate DMA memory\n", __func__);
		goto cleanup_dealloc;
	}

	DPRINTF(("%s: Using %d DMA segments\n", __func__, buf->i2b_nsegs));

	retval = bus_dmamem_map(sc->sc_dmat, buf->i2b_segs, buf->i2b_nsegs,
				buf->i2b_size, &buf->i2b_addr, dma_flags);

	if (retval != 0) {
		printf("%s: Failed to map DMA memory\n", __func__);
		goto cleanup_dealloc_dma;
	}

	DPRINTF(("%s: Playback DMA buffer mapped at %p\n", __func__,
		 buf->i2b_addr));

	/* XXX: Not sure if nsegments is really 1...*/
	retval = bus_dmamap_create(sc->sc_dmat, buf->i2b_size, 1,
				   buf->i2b_size, 0, dma_flags,
				   &buf->i2b_dmamap);
	if (retval != 0) {
		printf("%s: Failed to create DMA map\n", __func__);
		goto cleanup_unmap_dma;
	}

	DPRINTF(("%s: DMA map created successfully\n", __func__));

	buf->i2b_xfer = s3c2440_dmac_allocate_xfer(M_NOWAIT);
	if (buf->i2b_xfer == NULL) {
		retval = ENOMEM;
		goto cleanup_destroy_dmamap;
	}

	return 0;
cleanup_destroy_dmamap:
	bus_dmamap_destroy(sc->sc_dmat, buf->i2b_dmamap);
 cleanup_unmap_dma:
	bus_dmamem_unmap(sc->sc_dmat, &buf->i2b_addr, buf->i2b_size);
 cleanup_dealloc_dma:
	bus_dmamem_free(sc->sc_dmat, buf->i2b_segs, buf->i2b_nsegs);
 cleanup_dealloc:
	kmem_free(*out, sizeof(struct s3c2440_i2s_buf));
	return retval;
}
示例#11
0
static int
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
    uint64_t *ashift)
{
	spa_t *spa = vd->vdev_spa;
	vdev_disk_t *dvd;
	struct dk_minfo_ext dkmext;
	int error;
	dev_t dev;
	int otyp;
	boolean_t validate_devid = B_FALSE;
	ddi_devid_t devid;

	/*
	 * We must have a pathname, and it must be absolute.
	 */
	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
		return (SET_ERROR(EINVAL));
	}

	/*
	 * Reopen the device if it's not currently open. Otherwise,
	 * just update the physical size of the device.
	 */
	if (vd->vdev_tsd != NULL) {
		ASSERT(vd->vdev_reopening);
		dvd = vd->vdev_tsd;
		goto skip_open;
	}

	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);

	/*
	 * When opening a disk device, we want to preserve the user's original
	 * intent.  We always want to open the device by the path the user gave
	 * us, even if it is one of multiple paths to the save device.  But we
	 * also want to be able to survive disks being removed/recabled.
	 * Therefore the sequence of opening devices is:
	 *
	 * 1. Try opening the device by path.  For legacy pools without the
	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
	 *
	 * 2. If the devid of the device matches the stored value, return
	 *    success.
	 *
	 * 3. Otherwise, the device may have moved.  Try opening the device
	 *    by the devid instead.
	 */
	if (vd->vdev_devid != NULL) {
		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
		    &dvd->vd_minor) != 0) {
			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
			return (SET_ERROR(EINVAL));
		}
	}

	error = EINVAL;		/* presume failure */

	if (vd->vdev_path != NULL) {

		if (vd->vdev_wholedisk == -1ULL) {
			size_t len = strlen(vd->vdev_path) + 3;
			char *buf = kmem_alloc(len, KM_SLEEP);
			ldi_handle_t lh;

			(void) snprintf(buf, len, "%ss0", vd->vdev_path);

			if (ldi_open_by_name(buf, spa_mode(spa), kcred,
			    &lh, zfs_li) == 0) {
				spa_strfree(vd->vdev_path);
				vd->vdev_path = buf;
				vd->vdev_wholedisk = 1ULL;
				(void) ldi_close(lh, spa_mode(spa), kcred);
			} else {
				kmem_free(buf, len);
			}
		}

		error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
		    &dvd->vd_lh, zfs_li);

		/*
		 * Compare the devid to the stored value.
		 */
		if (error == 0 && vd->vdev_devid != NULL &&
		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
				error = SET_ERROR(EINVAL);
				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
				    kcred);
				dvd->vd_lh = NULL;
			}
			ddi_devid_free(devid);
		}

		/*
		 * If we succeeded in opening the device, but 'vdev_wholedisk'
		 * is not yet set, then this must be a slice.
		 */
		if (error == 0 && vd->vdev_wholedisk == -1ULL)
			vd->vdev_wholedisk = 0;
	}

	/*
	 * If we were unable to open by path, or the devid check fails, open by
	 * devid instead.
	 */
	if (error != 0 && vd->vdev_devid != NULL) {
		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
	}

	/*
	 * If all else fails, then try opening by physical path (if available)
	 * or the logical path (if we failed due to the devid check).  While not
	 * as reliable as the devid, this will give us something, and the higher
	 * level vdev validation will prevent us from opening the wrong device.
	 */
	if (error) {
		if (vd->vdev_devid != NULL)
			validate_devid = B_TRUE;

		if (vd->vdev_physpath != NULL &&
		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
			    kcred, &dvd->vd_lh, zfs_li);

		/*
		 * Note that we don't support the legacy auto-wholedisk support
		 * as above.  This hasn't been used in a very long time and we
		 * don't need to propagate its oddities to this edge condition.
		 */
		if (error && vd->vdev_path != NULL)
			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
			    kcred, &dvd->vd_lh, zfs_li);
	}

	if (error) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	/*
	 * Now that the device has been successfully opened, update the devid
	 * if necessary.
	 */
	if (validate_devid && spa_writeable(spa) &&
	    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
		if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
			char *vd_devid;

			vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor);
			zfs_dbgmsg("vdev %s: update devid from %s, "
			    "to %s", vd->vdev_path, vd->vdev_devid, vd_devid);
			spa_strfree(vd->vdev_devid);
			vd->vdev_devid = spa_strdup(vd_devid);
			ddi_devid_str_free(vd_devid);
		}
		ddi_devid_free(devid);
	}

	/*
	 * Once a device is opened, verify that the physical device path (if
	 * available) is up to date.
	 */
	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
		char *physpath, *minorname;

		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
		minorname = NULL;
		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
		    (vd->vdev_physpath == NULL ||
		    strcmp(vd->vdev_physpath, physpath) != 0)) {
			if (vd->vdev_physpath)
				spa_strfree(vd->vdev_physpath);
			(void) strlcat(physpath, ":", MAXPATHLEN);
			(void) strlcat(physpath, minorname, MAXPATHLEN);
			vd->vdev_physpath = spa_strdup(physpath);
		}
		if (minorname)
			kmem_free(minorname, strlen(minorname) + 1);
		kmem_free(physpath, MAXPATHLEN);
	}

skip_open:
	/*
	 * Determine the actual size of the device.
	 */
	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (SET_ERROR(EINVAL));
	}

	/*
	 * Determine the device's minimum transfer size.
	 * If the ioctl isn't supported, assume DEV_BSIZE.
	 */
	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext,
	    FKIOCTL, kcred, NULL) != 0)
		dkmext.dki_pbsize = DEV_BSIZE;

	*ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1;

	if (vd->vdev_wholedisk == 1) {
		uint64_t capacity = dkmext.dki_capacity - 1;
		uint64_t blksz = dkmext.dki_lbsize;
		int wce = 1;

		/*
		 * If we own the whole disk, try to enable disk write caching.
		 * We ignore errors because it's OK if we can't do it.
		 */
		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
		    FKIOCTL, kcred, NULL);

		*max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz);
		zfs_dbgmsg("capacity change: vdev %s, psize %llu, "
		    "max_psize %llu", vd->vdev_path, *psize, *max_psize);
	} else {
		*max_psize = *psize;
	}

	/*
	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
	 * try again.
	 */
	vd->vdev_nowritecache = B_FALSE;

	return (0);
}
示例#12
0
/*
 * This is called to add the given busy item to the transaction's
 * list of busy items.  It must find a free busy item descriptor
 * or allocate a new one and add the item to that descriptor.
 * The function returns a pointer to busy descriptor used to point
 * to the new busy entry.  The log busy entry will now point to its new
 * descriptor with its ???? field.
 */
xfs_log_busy_slot_t *
xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
{
	xfs_log_busy_chunk_t	*lbcp;
	xfs_log_busy_slot_t	*lbsp;
	int			i=0;

	/*
	 * If there are no free descriptors, allocate a new chunk
	 * of them and put it at the front of the chunk list.
	 */
	if (tp->t_busy_free == 0) {
		lbcp = (xfs_log_busy_chunk_t*)
		       kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP);
		ASSERT(lbcp != NULL);
		/*
		 * Initialize the chunk, and then
		 * claim the first slot in the newly allocated chunk.
		 */
		XFS_LBC_INIT(lbcp);
		XFS_LBC_CLAIM(lbcp, 0);
		lbcp->lbc_unused = 1;
		lbsp = XFS_LBC_SLOT(lbcp, 0);

		/*
		 * Link in the new chunk and update the free count.
		 */
		lbcp->lbc_next = tp->t_busy.lbc_next;
		tp->t_busy.lbc_next = lbcp;
		tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1;

		/*
		 * Initialize the descriptor and the generic portion
		 * of the log item.
		 *
		 * Point the new slot at this item and return it.
		 * Also point the log item at its currently active
		 * descriptor and set the item's mount pointer.
		 */
		lbsp->lbc_ag = ag;
		lbsp->lbc_idx = idx;
		return lbsp;
	}

	/*
	 * Find the free descriptor. It is somewhere in the chunklist
	 * of descriptors.
	 */
	lbcp = &tp->t_busy;
	while (lbcp != NULL) {
		if (XFS_LBC_VACANCY(lbcp)) {
			if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) {
				i = lbcp->lbc_unused;
				break;
			} else {
				/* out-of-order vacancy */
				cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp);
				ASSERT(0);
			}
		}
		lbcp = lbcp->lbc_next;
	}
	ASSERT(lbcp != NULL);
	/*
	 * If we find a free descriptor, claim it,
	 * initialize it, and return it.
	 */
	XFS_LBC_CLAIM(lbcp, i);
	if (lbcp->lbc_unused <= i) {
		lbcp->lbc_unused = i + 1;
	}
	lbsp = XFS_LBC_SLOT(lbcp, i);
	tp->t_busy_free--;
	lbsp->lbc_ag = ag;
	lbsp->lbc_idx = idx;
	return lbsp;
}
示例#13
0
/*
 * This is called to add the given log item to the transaction's
 * list of log items.  It must find a free log item descriptor
 * or allocate a new one and add the item to that descriptor.
 * The function returns a pointer to item descriptor used to point
 * to the new item.  The log item will now point to its new descriptor
 * with its li_desc field.
 */
xfs_log_item_desc_t *
xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
{
	xfs_log_item_desc_t	*lidp;
	xfs_log_item_chunk_t	*licp;
	int			i=0;

	/*
	 * If there are no free descriptors, allocate a new chunk
	 * of them and put it at the front of the chunk list.
	 */
	if (tp->t_items_free == 0) {
		licp = (xfs_log_item_chunk_t*)
		       kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
		ASSERT(licp != NULL);
		/*
		 * Initialize the chunk, and then
		 * claim the first slot in the newly allocated chunk.
		 */
		XFS_LIC_INIT(licp);
		XFS_LIC_CLAIM(licp, 0);
		licp->lic_unused = 1;
		XFS_LIC_INIT_SLOT(licp, 0);
		lidp = XFS_LIC_SLOT(licp, 0);

		/*
		 * Link in the new chunk and update the free count.
		 */
		licp->lic_next = tp->t_items.lic_next;
		tp->t_items.lic_next = licp;
		tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;

		/*
		 * Initialize the descriptor and the generic portion
		 * of the log item.
		 *
		 * Point the new slot at this item and return it.
		 * Also point the log item at its currently active
		 * descriptor and set the item's mount pointer.
		 */
		lidp->lid_item = lip;
		lidp->lid_flags = 0;
		lidp->lid_size = 0;
		lip->li_desc = lidp;
		lip->li_mountp = tp->t_mountp;
		return lidp;
	}

	/*
	 * Find the free descriptor. It is somewhere in the chunklist
	 * of descriptors.
	 */
	licp = &tp->t_items;
	while (licp != NULL) {
		if (XFS_LIC_VACANCY(licp)) {
			if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
				i = licp->lic_unused;
				ASSERT(XFS_LIC_ISFREE(licp, i));
				break;
			}
			for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
				if (XFS_LIC_ISFREE(licp, i))
					break;
			}
			ASSERT(i <= XFS_LIC_MAX_SLOT);
			break;
		}
		licp = licp->lic_next;
	}
	ASSERT(licp != NULL);
	/*
	 * If we find a free descriptor, claim it,
	 * initialize it, and return it.
	 */
	XFS_LIC_CLAIM(licp, i);
	if (licp->lic_unused <= i) {
		licp->lic_unused = i + 1;
		XFS_LIC_INIT_SLOT(licp, i);
	}
	lidp = XFS_LIC_SLOT(licp, i);
	tp->t_items_free--;
	lidp->lid_item = lip;
	lidp->lid_flags = 0;
	lidp->lid_size = 0;
	lip->li_desc = lidp;
	lip->li_mountp = tp->t_mountp;
	return lidp;
}
示例#14
0
/*
 * Scan the AGs starting at startag looking for an AG that isn't in use and has
 * at least minlen blocks free.
 */
static int
xfs_filestream_pick_ag(
	struct xfs_inode	*ip,
	xfs_agnumber_t		startag,
	xfs_agnumber_t		*agp,
	int			flags,
	xfs_extlen_t		minlen)
{
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_fstrm_item	*item;
	struct xfs_perag	*pag;
	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
	int			err, trylock, nscan;

	ASSERT(S_ISDIR(ip->i_d.di_mode));

	/* 2% of an AG's blocks must be free for it to be chosen. */
	minfree = mp->m_sb.sb_agblocks / 50;

	ag = startag;
	*agp = NULLAGNUMBER;

	/* For the first pass, don't sleep trying to init the per-AG. */
	trylock = XFS_ALLOC_FLAG_TRYLOCK;

	for (nscan = 0; 1; nscan++) {
		trace_xfs_filestream_scan(ip, ag);

		pag = xfs_perag_get(mp, ag);

		if (!pag->pagf_init) {
			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
			if (err && !trylock) {
				xfs_perag_put(pag);
				return err;
			}
		}

		/* Might fail sometimes during the 1st pass with trylock set. */
		if (!pag->pagf_init)
			goto next_ag;

		/* Keep track of the AG with the most free blocks. */
		if (pag->pagf_freeblks > maxfree) {
			maxfree = pag->pagf_freeblks;
			max_ag = ag;
		}

		/*
		 * The AG reference count does two things: it enforces mutual
		 * exclusion when examining the suitability of an AG in this
		 * loop, and it guards against two filestreams being established
		 * in the same AG as each other.
		 */
		if (xfs_filestream_get_ag(mp, ag) > 1) {
			xfs_filestream_put_ag(mp, ag);
			goto next_ag;
		}

		longest = xfs_alloc_longest_free_extent(mp, pag);
		if (((minlen && longest >= minlen) ||
		     (!minlen && pag->pagf_freeblks >= minfree)) &&
		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
		     (flags & XFS_PICK_LOWSPACE))) {

			/* Break out, retaining the reference on the AG. */
			free = pag->pagf_freeblks;
			xfs_perag_put(pag);
			*agp = ag;
			break;
		}

		/* Drop the reference on this AG, it's not usable. */
		xfs_filestream_put_ag(mp, ag);
next_ag:
		xfs_perag_put(pag);
		/* Move to the next AG, wrapping to AG 0 if necessary. */
		if (++ag >= mp->m_sb.sb_agcount)
			ag = 0;

		/* If a full pass of the AGs hasn't been done yet, continue. */
		if (ag != startag)
			continue;

		/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
		if (trylock != 0) {
			trylock = 0;
			continue;
		}

		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
		if (!(flags & XFS_PICK_LOWSPACE)) {
			flags |= XFS_PICK_LOWSPACE;
			continue;
		}

		/*
		 * Take the AG with the most free space, regardless of whether
		 * it's already in use by another filestream.
		 */
		if (max_ag != NULLAGNUMBER) {
			xfs_filestream_get_ag(mp, max_ag);
			free = maxfree;
			*agp = max_ag;
			break;
		}

		/* take AG 0 if none matched */
		trace_xfs_filestream_pick(ip, *agp, free, nscan);
		*agp = 0;
		return 0;
	}

	trace_xfs_filestream_pick(ip, *agp, free, nscan);

	if (*agp == NULLAGNUMBER)
		return 0;

	err = ENOMEM;
	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
	if (!item)
		goto out_put_ag;

	item->ag = *agp;
	item->ip = ip;

	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
	if (err) {
		if (err == EEXIST)
			err = 0;
		goto out_free_item;
	}

	return 0;

out_free_item:
	kmem_free(item);
out_put_ag:
	xfs_filestream_put_ag(mp, *agp);
	return err;
}
示例#15
0
文件: i686.c 项目: via/akaris-ng
void
i686_kmain(unsigned long magic, multiboot_info_t *info) {

  bootvideo_cls();

  parse_cmdline(info->cmdline);

  if (use_serial)
    i686_tty_init(0, 9600);

  i686_kernel.debug = i686_debug;

  if (magic != MULTIBOOT_BOOTLOADER_MAGIC) {
    i686_debug("Not booted from multiboot loader!\n");
    while (1);
  }
 
  i686_debug("mods_addr: %x\nmod_start: %x\n", info->mods_addr,
      0);

  i686_kernel.mutex = &i686_mutex;
  i686_kernel.bsp = (struct cpu *)i686_cpu_alloc();
  i686_kernel.bsp->kvirt = i686_virtmem_init(&i686_kernel);
  i686_kernel.phys = i686_physmem_alloc(&i686_kernel, info);

  kmem_init(i686_kernel.bsp->allocator);
  i686_kernel.bsp->v.init(i686_kernel.bsp);

  i686_debug("Location GDT entry: %x\n", ((struct i686_cpu *)i686_kernel.bsp)->gdt);

  virtaddr_t a;
  physaddr_t p;
  virtmem_error_t e1 = virtmem_kernel_alloc(i686_kernel.bsp->kvirt, &a, 1);
  assert(e1 == VIRTMEM_SUCCESS);
  physmem_error_t e2 = physmem_page_alloc(i686_kernel.bsp->localmem, 0, &p);
  assert(e2 == PHYSMEM_SUCCESS);
  virtmem_kernel_map_virt_to_phys(i686_kernel.bsp->kvirt, p, a);
  i686_debug("Allocated address: %x(->%x)\n", a, p);

  char *s = (char *)a;

  strcpy(s, "This shows the validity of this memory");
  i686_debug("%x contains: %s\n", a, s);

  struct kmem_cache *s1 = kmem_alloc(i686_kernel.bsp->allocator);
  kmem_cache_init(i686_kernel.bsp->allocator,
      s1, i686_kernel.bsp, "test", 128, NULL, NULL);

  char *t1 = kmem_cache_alloc(s1);
  i686_debug("cache at %x provided us with %x\n", s1, t1);
  strcpy(t1, "This shows the validity of the slab allocation");
  i686_debug("%x contains: %s\n", t1, t1);

  i686_address_space_init();
  struct address_space *as;
  struct memory_region *mr;
  address_space_alloc(&as);
  memory_region_alloc(&mr);

  e1 = virtmem_kernel_alloc(i686_kernel.bsp->kvirt, &a, 1);
  virtmem_kernel_map_virt_to_phys(i686_kernel.bsp->kvirt, (physaddr_t)as->pd, a);
  
  address_space_init_region(as, mr, (virtaddr_t)0x1000000, 0x2000);
  memory_region_set_flags(mr, 1, 1);
  memory_region_map(as, mr, NULL);

  const char *teststr = "This is a test string to be copied to userspace.";
  char testcpybuf[128];
  char opcodes[] = {0xeb, 0xfe};
  virtmem_copy_kernel_to_user(i686_kernel.bsp->kvirt, as->pd, (void *)0x1000ffc, 
      (const void *)teststr, strlen(teststr) + 1);
  virtmem_copy_user_to_kernel(i686_kernel.bsp->kvirt, (void *)&testcpybuf, 
      as->pd, (const void *)0x1000ffc, strlen(teststr) + 1);
  i686_debug("testcpybuf contains '%s'\n", testcpybuf);
  virtmem_copy_kernel_to_user(i686_kernel.bsp->kvirt, as->pd, (void *)0x1000000, 
      (const void *)opcodes, 2);


  struct thread *thr1;
  scheduler_thread_alloc(cpu()->sched, &thr1);
  thread_init(thr1, as);
  thr1->state = THREAD_RUNNABLE;
  scheduler_thread_add(cpu()->sched, thr1);
  scheduler_reschedule(cpu()->sched);
  virtmem_user_setup_kernelspace(i686_kernel.bsp->kvirt, as->pd);
  virtmem_set_context(i686_kernel.bsp->kvirt, as->pd);
  scheduler_resume(cpu()->sched);
  while (1);
}
示例#16
0
void
reset_stripe(ms_unit_t *un, minor_t mnum, int removing)
{
	ms_comp_t	*mdcomp;
	struct ms_row	*mdr;
	int		i, c;
	int		row;
	int		nsv;
	int		isv;
	sv_dev_t	*sv;
	mddb_recid_t	*recids;
	mddb_recid_t	vtoc_id;
	int		rid = 0;

	md_destroy_unit_incore(mnum, &stripe_md_ops);

	md_nblocks_set(mnum, -1ULL);
	MD_UNIT(mnum) = NULL;

	/*
	 * Attempt release of its minor node
	 */
	md_remove_minor_node(mnum);

	if (!removing)
		return;

	nsv = 0;
	/* Count the number of devices */
	for (row = 0; row < un->un_nrows; row++) {
		mdr = &un->un_row[row];
		nsv += mdr->un_ncomp;
	}
	sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP);

	/*
	 * allocate recids array.  since we may have to commit
	 * underlying soft partition records, we need an array
	 * of size: total number of components in stripe + 3
	 * (one for the stripe itself, one for the hotspare, one
	 * for the end marker).
	 */
	recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP);

	/*
	 * Save the md_dev64_t's and driver nm indexes.
	 * Because after the mddb_deleterec() we will
	 * not be able to access the unit structure.
	 *
	 * NOTE: Deleting the names before deleting the
	 *	 unit structure would cause problems if
	 *	 the machine crashed in between the two.
	 */
	isv = 0;
	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);

	for (row = 0; row < un->un_nrows; row++) {
		mdr = &un->un_row[row];
		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
			struct ms_comp	*mdc;
			md_dev64_t	child_dev;
			md_unit_t	*child_un;

			mdc = &mdcomp[c++];
			if (mdc->un_mirror.ms_hs_id != 0) {
				mdkey_t		hs_key;

				hs_key = mdc->un_mirror.ms_hs_key;

				mdc->un_dev = mdc->un_mirror.ms_orig_dev;
				mdc->un_start_block =
				    mdc->un_mirror.ms_orig_blk;
				mdc->un_mirror.ms_hs_id = 0;
				mdc->un_mirror.ms_hs_key = 0;
				mdc->un_mirror.ms_orig_dev = 0;
				recids[0] = 0;
				recids[1] = 0;	/* recids[1] filled in below */
				recids[2] = 0;
				(void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id,
				    0, 0, &recids[0], &hs_key, NULL, NULL);
				mddb_commitrecs_wrapper(recids);
			}

			/*
			 * check if we've got metadevice below us and
			 * deparent it if we do.
			 * NOTE: currently soft partitions are the
			 * the only metadevices stripes can be
			 * built on top of.
			 */
			child_dev = mdc->un_dev;
			if (md_getmajor(child_dev) == md_major) {
				child_un = MD_UNIT(md_getminor(child_dev));
				md_reset_parent(child_dev);
				recids[rid++] = MD_RECID(child_un);
			}

			sv[isv].setno = MD_MIN2SET(mnum);
			sv[isv++].key = mdc->un_key;
		}
	}

	recids[rid++] = un->c.un_record_id;
	recids[rid] = 0;	/* filled in below */

	/*
	 * Decrement the HSP reference count and
	 * remove the knowledge of the HSP from the unit struct.
	 * This is done atomically to remove a window.
	 */
	if (un->un_hsp_id != -1) {
		(void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
		    &recids[rid++], NULL, NULL, NULL);
		un->un_hsp_id = -1;
	}

	/* set end marker and commit records */
	recids[rid] = 0;
	mddb_commitrecs_wrapper(recids);

	vtoc_id = un->c.un_vtoc_id;

	/*
	 * Remove self from the namespace
	 */
	if (un->c.un_revision & MD_FN_META_DEV) {
		(void) md_rem_selfname(un->c.un_self_id);
	}

	/* Remove the unit structure */
	mddb_deleterec_wrapper(un->c.un_record_id);

	/* Remove the vtoc, if present */
	if (vtoc_id)
		mddb_deleterec_wrapper(vtoc_id);

	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE,
	    MD_MIN2SET(mnum), MD_MIN2UNIT(mnum));
	md_rem_names(sv, nsv);
	kmem_free(sv, sizeof (sv_dev_t) * nsv);
	kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3));
}
示例#17
0
int
zpl_set_acl(struct inode *ip, int type, struct posix_acl *acl)
{
	struct super_block *sb = ITOZSB(ip)->z_sb;
	char *name, *value = NULL;
	int error = 0;
	size_t size = 0;

	if (S_ISLNK(ip->i_mode))
		return (-EOPNOTSUPP);

	switch (type) {
	case ACL_TYPE_ACCESS:
		name = POSIX_ACL_XATTR_ACCESS;
		if (acl) {
			zpl_equivmode_t mode = ip->i_mode;
			error = posix_acl_equiv_mode(acl, &mode);
			if (error < 0) {
				return (error);
			} else {
				/*
				 * The mode bits will have been set by
				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
				 * using the ZFS ACL conversion.  If they
				 * differ from the Posix ACL conversion dirty
				 * the inode to write the Posix mode bits.
				 */
				if (ip->i_mode != mode) {
					ip->i_mode = mode;
					ip->i_ctime = current_fs_time(sb);
					zfs_mark_inode_dirty(ip);
				}

				if (error == 0)
					acl = NULL;
			}
		}
		break;

	case ACL_TYPE_DEFAULT:
		name = POSIX_ACL_XATTR_DEFAULT;
		if (!S_ISDIR(ip->i_mode))
			return (acl ? -EACCES : 0);
		break;

	default:
		return (-EINVAL);
	}

	if (acl) {
		size = posix_acl_xattr_size(acl->a_count);
		value = kmem_alloc(size, KM_SLEEP);

		error = zpl_acl_to_xattr(acl, value, size);
		if (error < 0) {
			kmem_free(value, size);
			return (error);
		}
	}

	error = zpl_xattr_set(ip, name, value, size, 0);
	if (value)
		kmem_free(value, size);

	if (!error) {
		if (acl)
			zpl_set_cached_acl(ip, type, acl);
		else
			zpl_forget_cached_acl(ip, type);
	}

	return (error);
}
示例#18
0
文件: mach_debug.c 项目: chedr/xnu
kern_return_t
mach_port_space_info(
    ipc_space_t			space,
    ipc_info_space_t		*infop,
    ipc_info_name_array_t		*tablep,
    mach_msg_type_number_t 		*tableCntp,
    __unused ipc_info_tree_name_array_t	*treep,
    __unused mach_msg_type_number_t         *treeCntp)
{
    ipc_info_name_t *table_info;
    vm_offset_t table_addr;
    vm_size_t table_size, table_size_needed;
    ipc_entry_t table;
    ipc_entry_num_t tsize;
    mach_port_index_t index;
    kern_return_t kr;
    vm_map_copy_t copy;


    if (space == IS_NULL)
        return KERN_INVALID_TASK;

#if !(DEVELOPMENT | DEBUG)
    const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task) == 0);
#else
    const boolean_t dbg_ok = TRUE;
#endif

    /* start with in-line memory */

    table_size = 0;

    for (;;) {
        is_read_lock(space);
        if (!is_active(space)) {
            is_read_unlock(space);
            if (table_size != 0)
                kmem_free(ipc_kernel_map,
                          table_addr, table_size);
            return KERN_INVALID_TASK;
        }

        table_size_needed =
            vm_map_round_page((space->is_table_size
                               * sizeof(ipc_info_name_t)),
                              VM_MAP_PAGE_MASK(ipc_kernel_map));

        if (table_size_needed == table_size)
            break;

        is_read_unlock(space);

        if (table_size != table_size_needed) {
            if (table_size != 0)
                kmem_free(ipc_kernel_map, table_addr, table_size);
            kr = kmem_alloc(ipc_kernel_map,	&table_addr, table_size_needed, VM_KERN_MEMORY_IPC);
            if (kr != KERN_SUCCESS) {
                return KERN_RESOURCE_SHORTAGE;
            }
            table_size = table_size_needed;
        }

    }
    /* space is read-locked and active; we have enough wired memory */

    /* get the overall space info */
    infop->iis_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD);
    infop->iis_table_size = space->is_table_size;
    infop->iis_table_next = space->is_table_next->its_size;

    /* walk the table for this space */
    table = space->is_table;
    tsize = space->is_table_size;
    table_info = (ipc_info_name_array_t)table_addr;
    for (index = 0; index < tsize; index++) {
        ipc_info_name_t *iin = &table_info[index];
        ipc_entry_t entry = &table[index];
        ipc_entry_bits_t bits;

        bits = entry->ie_bits;
        iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits));
        iin->iin_collision = 0;
        iin->iin_type = IE_BITS_TYPE(bits);
        if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
                entry->ie_request != IE_REQ_NONE) {
            __IGNORE_WCASTALIGN(ipc_port_t port = (ipc_port_t) entry->ie_object);

            assert(IP_VALID(port));
            ip_lock(port);
            iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request);
            ip_unlock(port);
        }

        iin->iin_urefs = IE_BITS_UREFS(bits);
        iin->iin_object = (dbg_ok) ? (natural_t)VM_KERNEL_ADDRPERM((uintptr_t)entry->ie_object) : 0;
        iin->iin_next = entry->ie_next;
        iin->iin_hash = entry->ie_index;
    }

    is_read_unlock(space);

    /* prepare the table out-of-line data for return */
    if (table_size > 0) {
        if (table_size > infop->iis_table_size * sizeof(ipc_info_name_t))
            bzero((char *)&table_info[infop->iis_table_size],
                  table_size - infop->iis_table_size * sizeof(ipc_info_name_t));

        kr = vm_map_unwire(
                 ipc_kernel_map,
                 vm_map_trunc_page(table_addr,
                                   VM_MAP_PAGE_MASK(ipc_kernel_map)),
                 vm_map_round_page(table_addr + table_size,
                                   VM_MAP_PAGE_MASK(ipc_kernel_map)),
                 FALSE);
        assert(kr == KERN_SUCCESS);
        kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr,
                           (vm_map_size_t)table_size, TRUE, &copy);
        assert(kr == KERN_SUCCESS);
        *tablep = (ipc_info_name_t *)copy;
        *tableCntp = infop->iis_table_size;
    } else {
        *tablep = (ipc_info_name_t *)0;
        *tableCntp = 0;
    }

    /* splay tree is obsolete, no work to do... */
    *treep = (ipc_info_tree_name_t *)0;
    *treeCntp = 0;
    return KERN_SUCCESS;
}
示例#19
0
文件: spa_config.c 项目: 64116278/zfs
/*
 * Called when the module is first loaded, this routine loads the configuration
 * file into the SPA namespace.  It does not actually open or load the pools; it
 * only populates the namespace.
 */
void
spa_config_load(void)
{
	void *buf = NULL;
	nvlist_t *nvlist, *child;
	nvpair_t *nvpair;
	char *pathname;
	struct _buf *file;
	uint64_t fsize;

#ifdef _KERNEL
	if (zfs_autoimport_disable)
		return;
#endif

	/*
	 * Open the configuration file.
	 */
	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);

	(void) snprintf(pathname, MAXPATHLEN, "%s%s",
	    (rootdir != NULL) ? "./" : "", spa_config_path);

	file = kobj_open_file(pathname);

	kmem_free(pathname, MAXPATHLEN);

	if (file == (struct _buf *)-1)
		return;

	if (kobj_get_filesize(file, &fsize) != 0)
		goto out;

	buf = kmem_alloc(fsize, KM_SLEEP);

	/*
	 * Read the nvlist from the file.
	 */
	if (kobj_read_file(file, buf, fsize, 0) < 0)
		goto out;

	/*
	 * Unpack the nvlist.
	 */
	if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
		goto out;

	/*
	 * Iterate over all elements in the nvlist, creating a new spa_t for
	 * each one with the specified configuration.
	 */
	mutex_enter(&spa_namespace_lock);
	nvpair = NULL;
	while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
		if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
			continue;

		child = fnvpair_value_nvlist(nvpair);

		if (spa_lookup(nvpair_name(nvpair)) != NULL)
			continue;
		(void) spa_add(nvpair_name(nvpair), child, NULL);
	}
	mutex_exit(&spa_namespace_lock);

	nvlist_free(nvlist);

out:
	if (buf != NULL)
		kmem_free(buf, fsize);

	kobj_close_file(file);
}
示例#20
0
/*
 * Construct a stack for init containing the arguments to it, then
 * pass control to exec_common.
 */
int
exec_init(const char *initpath, const char *args)
{
	caddr32_t ucp;
	caddr32_t *uap;
	caddr32_t *argv;
	caddr32_t exec_fnamep;
	char *scratchargs;
	int i, sarg;
	size_t argvlen, alen;
	boolean_t in_arg;
	int argc = 0;
	int error = 0, count = 0;
	proc_t *p = ttoproc(curthread);
	klwp_t *lwp = ttolwp(curthread);
	int brand_action;

	if (args == NULL)
		args = "";

	alen = strlen(initpath) + 1 + strlen(args) + 1;
	scratchargs = kmem_alloc(alen, KM_SLEEP);
	(void) snprintf(scratchargs, alen, "%s %s", initpath, args);

	/*
	 * We do a quick two state parse of the string to sort out how big
	 * argc should be.
	 */
	in_arg = B_FALSE;
	for (i = 0; i < strlen(scratchargs); i++) {
		if (scratchargs[i] == ' ' || scratchargs[i] == '\0') {
			if (in_arg) {
				in_arg = B_FALSE;
				argc++;
			}
		} else {
			in_arg = B_TRUE;
		}
	}
	argvlen = sizeof (caddr32_t) * (argc + 1);
	argv = kmem_zalloc(argvlen, KM_SLEEP);

	/*
	 * We pull off a bit of a hack here.  We work our way through the
	 * args string, putting nulls at the ends of space delimited tokens
	 * (boot args don't support quoting at this time).  Then we just
	 * copy the whole mess to userland in one go.  In other words, we
	 * transform this: "init -s -r\0" into this on the stack:
	 *
	 *	-0x00 \0
	 *	-0x01 r
	 *	-0x02 -  <--------.
	 *	-0x03 \0	  |
	 *	-0x04 s		  |
	 *	-0x05 -  <------. |
	 *	-0x06 \0	| |
	 *	-0x07 t		| |
	 *	-0x08 i 	| |
	 *	-0x09 n		| |
	 *	-0x0a i  <---.  | |
	 *	-0x10 NULL   |  | |	(argv[3])
	 *	-0x14   -----|--|-'	(argv[2])
	 *	-0x18  ------|--'	(argv[1])
	 *	-0x1c -------'		(argv[0])
	 *
	 * Since we know the value of ucp at the beginning of this process,
	 * we can trivially compute the argv[] array which we also need to
	 * place in userland: argv[i] = ucp - sarg(i), where ucp is the
	 * stack ptr, and sarg is the string index of the start of the
	 * argument.
	 */
	ucp = (caddr32_t)(uintptr_t)p->p_usrstack;

	argc = 0;
	in_arg = B_FALSE;
	sarg = 0;

	for (i = 0; i < alen; i++) {
		if (scratchargs[i] == ' ' || scratchargs[i] == '\0') {
			if (in_arg == B_TRUE) {
				in_arg = B_FALSE;
				scratchargs[i] = '\0';
				argv[argc++] = ucp - (alen - sarg);
			}
		} else if (in_arg == B_FALSE) {
			in_arg = B_TRUE;
			sarg = i;
		}
	}
	ucp -= alen;
	error |= copyout(scratchargs, (caddr_t)(uintptr_t)ucp, alen);

	uap = (caddr32_t *)P2ALIGN((uintptr_t)ucp, sizeof (caddr32_t));
	uap--;	/* advance to be below the word we're in */
	uap -= (argc + 1);	/* advance argc words down, plus one for NULL */
	error |= copyout(argv, uap, argvlen);

	if (error != 0) {
		zcmn_err(p->p_zone->zone_id, CE_WARN,
		    "Could not construct stack for init.\n");
		kmem_free(argv, argvlen);
		kmem_free(scratchargs, alen);
		return (EFAULT);
	}

	exec_fnamep = argv[0];
	kmem_free(argv, argvlen);
	kmem_free(scratchargs, alen);

	/*
	 * Point at the arguments.
	 */
	lwp->lwp_ap = lwp->lwp_arg;
	lwp->lwp_arg[0] = (uintptr_t)exec_fnamep;
	lwp->lwp_arg[1] = (uintptr_t)uap;
	lwp->lwp_arg[2] = NULL;
	curthread->t_post_sys = 1;
	curthread->t_sysnum = SYS_execve;

	/*
	 * If we are executing init from zsched, we may have inherited its
	 * parent process's signal mask.  Clear it now so that we behave in
	 * the same way as when started from the global zone.
	 */
	sigemptyset(&curthread->t_hold);

	brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE;
again:
	error = exec_common((const char *)(uintptr_t)exec_fnamep,
	    (const char **)(uintptr_t)uap, NULL, brand_action);

	/*
	 * Normally we would just set lwp_argsaved and t_post_sys and
	 * let post_syscall reset lwp_ap for us.  Unfortunately,
	 * exec_init isn't always called from a system call.  Instead
	 * of making a mess of trap_cleanup, we just reset the args
	 * pointer here.
	 */
	reset_syscall_args();

	switch (error) {
	case 0:
		return (0);

	case ENOENT:
		zcmn_err(p->p_zone->zone_id, CE_WARN,
		    "exec(%s) failed (file not found).\n", initpath);
		return (ENOENT);

	case EAGAIN:
	case EINTR:
		++count;
		if (count < 5) {
			zcmn_err(p->p_zone->zone_id, CE_WARN,
			    "exec(%s) failed with errno %d.  Retrying...\n",
			    initpath, error);
			goto again;
		}
	}

	zcmn_err(p->p_zone->zone_id, CE_WARN,
	    "exec(%s) failed with errno %d.", initpath, error);
	return (error);
}
示例#21
0
/*ARGSUSED*/
int
zfsctl_snapdir_rename(struct inode *sdip, char *sname,
                      struct inode *tdip, char *tname, cred_t *cr, int flags)
{
    zfs_sb_t *zsb = ITOZSB(sdip);
    zfs_snapentry_t search, *sep;
    avl_index_t where;
    char *to, *from, *real;
    int error;

    ZFS_ENTER(zsb);

    to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
    from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
    real = kmem_alloc(MAXNAMELEN, KM_SLEEP);

    if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
        error = dmu_snapshot_realname(zsb->z_os, sname, real,
                                      MAXNAMELEN, NULL);
        if (error == 0) {
            sname = real;
        } else if (error != ENOTSUP) {
            goto out;
        }
    }

    error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
    if (!error)
        error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
    if (!error)
        error = zfs_secpolicy_rename_perms(from, to, cr);
    if (error)
        goto out;

    /*
     * Cannot move snapshots out of the snapdir.
     */
    if (sdip != tdip) {
        error = EINVAL;
        goto out;
    }

    /*
     * No-op when names are identical.
     */
    if (strcmp(sname, tname) == 0) {
        error = 0;
        goto out;
    }

    mutex_enter(&zsb->z_ctldir_lock);

    error = dmu_objset_rename(from, to, B_FALSE);
    if (error)
        goto out_unlock;

    search.se_name = (char *)sname;
    sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
    if (sep)
        zfsctl_rename_snap(zsb, sep, tname);

out_unlock:
    mutex_exit(&zsb->z_ctldir_lock);
out:
    kmem_free(from, MAXNAMELEN);
    kmem_free(to, MAXNAMELEN);
    kmem_free(real, MAXNAMELEN);

    ZFS_EXIT(zsb);

    return (error);
}
示例#22
0
/* ARGSUSED */
static int
sha2_create_ctx_template(crypto_provider_handle_t provider,
    crypto_mechanism_t *mechanism, crypto_key_t *key,
    crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
    crypto_req_handle_t req)
{
	sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl;
	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
	uint32_t sha_digest_len, sha_hmac_block_size;

	/*
	 * Set the digest length and block size to values appropriate to the
	 * mechanism
	 */
	switch (mechanism->cm_type) {
	case SHA256_HMAC_MECH_INFO_TYPE:
	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
		sha_digest_len = SHA256_DIGEST_LENGTH;
		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
		break;
	case SHA384_HMAC_MECH_INFO_TYPE:
	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
	case SHA512_HMAC_MECH_INFO_TYPE:
	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
		sha_digest_len = SHA512_DIGEST_LENGTH;
		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
		break;
	default:
		return (CRYPTO_MECHANISM_INVALID);
	}

	/* Add support for key by attributes (RFE 4706552) */
	if (key->ck_format != CRYPTO_KEY_RAW)
		return (CRYPTO_ARGUMENTS_BAD);

	/*
	 * Allocate and initialize SHA2 context.
	 */
	sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t),
	    crypto_kmflag(req));
	if (sha2_hmac_ctx_tmpl == NULL)
		return (CRYPTO_HOST_MEMORY);

	sha2_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;

	if (keylen_in_bytes > sha_hmac_block_size) {
		uchar_t digested_key[SHA512_DIGEST_LENGTH];

		/*
		 * Hash the passed-in key to get a smaller key.
		 * The inner context is used since it hasn't been
		 * initialized yet.
		 */
		PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
		    &sha2_hmac_ctx_tmpl->hc_icontext,
		    key->ck_data, keylen_in_bytes, digested_key);
		sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, digested_key,
		    sha_digest_len);
	} else {
		sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, key->ck_data,
		    keylen_in_bytes);
	}

	*ctx_template = (crypto_spi_ctx_template_t)sha2_hmac_ctx_tmpl;
	*ctx_template_size = sizeof (sha2_hmac_ctx_t);

	return (CRYPTO_SUCCESS);
}
int
ELFNAME2(linux,copyargs)(struct lwp *l, struct exec_package *pack,
	struct ps_strings *arginfo, char **stackp, void *argp)
{
	struct linux_extra_stack_data64 *esdp, esd;
	struct elf_args *ap;
	struct vattr *vap;
	Elf_Ehdr *eh;
	Elf_Phdr *ph;
	u_long phsize;
	Elf_Addr phdr = 0;
	int error;
	int i;

	if ((error = copyargs(l, pack, arginfo, stackp, argp)) != 0)
		return error;

	/*
	 * Push extra arguments on the stack needed by dynamically
	 * linked binaries and static binaries as well.
	 */
	memset(&esd, 0, sizeof(esd));
	esdp = (struct linux_extra_stack_data64 *)(*stackp);
	ap = (struct elf_args *)pack->ep_emul_arg;
	vap = pack->ep_vap;
	eh = (Elf_Ehdr *)pack->ep_hdr;

	/*
	 * We forgot this, so we need to reload it now. XXX keep track of it?
	 */
	if (ap == NULL) {
		phsize = eh->e_phnum * sizeof(Elf_Phdr);
		ph = (Elf_Phdr *)kmem_alloc(phsize, KM_SLEEP);
		error = exec_read_from(l, pack->ep_vp, eh->e_phoff, ph, phsize);
		if (error != 0) {
			for (i = 0; i < eh->e_phnum; i++) {
				if (ph[i].p_type == PT_PHDR) {
					phdr = ph[i].p_vaddr;
					break;
				}
			}
		}
		kmem_free(ph, phsize);
	}


	/*
	 * The exec_package doesn't have a proc pointer and it's not
	 * exactly trivial to add one since the credentials are
	 * changing. XXX Linux uses curlwp's credentials.
	 * Why can't we use them too?
	 */

	i = 0;
	esd.ai[i].a_type = LINUX_AT_HWCAP;
	esd.ai[i++].a_v = rcr4();

	esd.ai[i].a_type = AT_PAGESZ;
	esd.ai[i++].a_v = PAGE_SIZE;

	esd.ai[i].a_type = LINUX_AT_CLKTCK;
	esd.ai[i++].a_v = hz;

	esd.ai[i].a_type = AT_PHDR;
	esd.ai[i++].a_v = (ap ? ap->arg_phaddr: phdr);

	esd.ai[i].a_type = AT_PHENT;
	esd.ai[i++].a_v = (ap ? ap->arg_phentsize : eh->e_phentsize);

	esd.ai[i].a_type = AT_PHNUM;
	esd.ai[i++].a_v = (ap ? ap->arg_phnum : eh->e_phnum);

	esd.ai[i].a_type = AT_BASE;
	esd.ai[i++].a_v = (ap ? ap->arg_interp : 0);

	esd.ai[i].a_type = AT_FLAGS;
	esd.ai[i++].a_v = 0;

	esd.ai[i].a_type = AT_ENTRY;
	esd.ai[i++].a_v = (ap ? ap->arg_entry : eh->e_entry);

	esd.ai[i].a_type = LINUX_AT_EGID;
	esd.ai[i++].a_v = ((vap->va_mode & S_ISGID) ?
	    vap->va_gid : kauth_cred_getegid(l->l_cred));

	esd.ai[i].a_type = LINUX_AT_GID;
	esd.ai[i++].a_v = kauth_cred_getgid(l->l_cred);

	esd.ai[i].a_type = LINUX_AT_EUID;
	esd.ai[i++].a_v = ((vap->va_mode & S_ISUID) ? 
	    vap->va_uid : kauth_cred_geteuid(l->l_cred));

	esd.ai[i].a_type = LINUX_AT_UID;
	esd.ai[i++].a_v = kauth_cred_getuid(l->l_cred);

	esd.ai[i].a_type = LINUX_AT_SECURE;
	esd.ai[i++].a_v = 0;

	esd.ai[i].a_type = LINUX_AT_PLATFORM;
	esd.ai[i++].a_v = (Elf_Addr)&esdp->hw_platform[0];

	esd.ai[i].a_type = LINUX_AT_RANDOM;
	esd.ai[i++].a_v = (Elf_Addr)&esdp->randbytes[0];
	esd.randbytes[0] = cprng_strong32();
	esd.randbytes[1] = cprng_strong32();
	esd.randbytes[2] = cprng_strong32();
	esd.randbytes[3] = cprng_strong32();

	esd.ai[i].a_type = AT_NULL;
	esd.ai[i++].a_v = 0;

	KASSERT(i == LINUX_ELF_AUX_ENTRIES);

	strcpy(esd.hw_platform, LINUX_PLATFORM); 

	exec_free_emul_arg(pack);

	/*
	 * Copy out the ELF auxiliary table and hw platform name
	 */
	if ((error = copyout(&esd, esdp, sizeof(esd))) != 0)
		return error;
	*stackp += sizeof(esd);

	return 0;
}
示例#24
0
static int
sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
    crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
    crypto_req_handle_t req)
{
	int ret = CRYPTO_SUCCESS;
	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
	uint_t sha_digest_len, sha_hmac_block_size;

	/*
	 * Set the digest length and block size to values appropriate to the
	 * mechanism
	 */
	switch (mechanism->cm_type) {
	case SHA256_HMAC_MECH_INFO_TYPE:
	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
		sha_digest_len = SHA256_DIGEST_LENGTH;
		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
		break;
	case SHA384_HMAC_MECH_INFO_TYPE:
	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
	case SHA512_HMAC_MECH_INFO_TYPE:
	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
		sha_digest_len = SHA512_DIGEST_LENGTH;
		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
		break;
	default:
		return (CRYPTO_MECHANISM_INVALID);
	}

	if (key->ck_format != CRYPTO_KEY_RAW)
		return (CRYPTO_ARGUMENTS_BAD);

	ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t),
	    crypto_kmflag(req));
	if (ctx->cc_provider_private == NULL)
		return (CRYPTO_HOST_MEMORY);

	PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
	if (ctx_template != NULL) {
		/* reuse context template */
		bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx),
		    sizeof (sha2_hmac_ctx_t));
	} else {
		/* no context template, compute context */
		if (keylen_in_bytes > sha_hmac_block_size) {
			uchar_t digested_key[SHA512_DIGEST_LENGTH];
			sha2_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;

			/*
			 * Hash the passed-in key to get a smaller key.
			 * The inner context is used since it hasn't been
			 * initialized yet.
			 */
			PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
			    &hmac_ctx->hc_icontext,
			    key->ck_data, keylen_in_bytes, digested_key);
			sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
			    digested_key, sha_digest_len);
		} else {
			sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
			    key->ck_data, keylen_in_bytes);
		}
	}

	/*
	 * Get the mechanism parameters, if applicable.
	 */
	if (mechanism->cm_type % 3 == 2) {
		if (mechanism->cm_param == NULL ||
		    mechanism->cm_param_len != sizeof (ulong_t))
			ret = CRYPTO_MECHANISM_PARAM_INVALID;
		PROV_SHA2_GET_DIGEST_LEN(mechanism,
		    PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
		if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
			ret = CRYPTO_MECHANISM_PARAM_INVALID;
	}

	if (ret != CRYPTO_SUCCESS) {
		bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
		kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
		ctx->cc_provider_private = NULL;
	}

	return (ret);
}
示例#25
0
/* Construct ECGroup from hex parameters and name, if any. Called by
 * ECGroup_fromHex and ECGroup_fromName. */
ECGroup *
ecgroup_fromNameAndHex(const ECCurveName name,
				   const ECCurveParams * params, int kmflag)
{
	mp_int irr, curvea, curveb, genx, geny, order;
	int bits;
	ECGroup *group = NULL;
	mp_err res = MP_OKAY;

	/* initialize values */
	MP_DIGITS(&irr) = 0;
	MP_DIGITS(&curvea) = 0;
	MP_DIGITS(&curveb) = 0;
	MP_DIGITS(&genx) = 0;
	MP_DIGITS(&geny) = 0;
	MP_DIGITS(&order) = 0;
	MP_CHECKOK(mp_init(&irr, kmflag));
	MP_CHECKOK(mp_init(&curvea, kmflag));
	MP_CHECKOK(mp_init(&curveb, kmflag));
	MP_CHECKOK(mp_init(&genx, kmflag));
	MP_CHECKOK(mp_init(&geny, kmflag));
	MP_CHECKOK(mp_init(&order, kmflag));
	MP_CHECKOK(mp_read_radix(&irr, params->irr, 16));
	MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16));
	MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16));
	MP_CHECKOK(mp_read_radix(&genx, params->genx, 16));
	MP_CHECKOK(mp_read_radix(&geny, params->geny, 16));
	MP_CHECKOK(mp_read_radix(&order, params->order, 16));

	/* determine number of bits */
	bits = mpl_significant_bits(&irr) - 1;
	if (bits < MP_OKAY) {
		res = bits;
		goto CLEANUP;
	}

	/* determine which optimizations (if any) to use */
	if (params->field == ECField_GFp) {
#ifdef NSS_ECC_MORE_THAN_SUITE_B
	    switch (name) {
#ifdef ECL_USE_FP
		case ECCurve_SECG_PRIME_160R1:
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_secp160r1_fp(group));
			break;
#endif
		case ECCurve_SECG_PRIME_192R1:
#ifdef ECL_USE_FP
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_nistp192_fp(group));
#else
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_gfp192(group, name));
#endif
			break;
		case ECCurve_SECG_PRIME_224R1:
#ifdef ECL_USE_FP
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_nistp224_fp(group));
#else
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_gfp224(group, name));
#endif
			break;
		case ECCurve_SECG_PRIME_256R1:
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_gfp256(group, name));
			break;
		case ECCurve_SECG_PRIME_521R1:
			group =
				ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
								&order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
			MP_CHECKOK(ec_group_set_gfp521(group, name));
			break;
		default:
			/* use generic arithmetic */
#endif
			group =
				ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
									 &order, params->cofactor);
			if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
#ifdef NSS_ECC_MORE_THAN_SUITE_B
		}
	} else if (params->field == ECField_GF2m) {
		group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx, &geny, &order, params->cofactor);
		if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
		if ((name == ECCurve_NIST_K163) ||
		    (name == ECCurve_NIST_B163) ||
		    (name == ECCurve_SECG_CHAR2_163R1)) {
			MP_CHECKOK(ec_group_set_gf2m163(group, name));
		} else if ((name == ECCurve_SECG_CHAR2_193R1) ||
		           (name == ECCurve_SECG_CHAR2_193R2)) {
			MP_CHECKOK(ec_group_set_gf2m193(group, name));
		} else if ((name == ECCurve_NIST_K233) ||
		           (name == ECCurve_NIST_B233)) {
			MP_CHECKOK(ec_group_set_gf2m233(group, name));
		}
#endif
	} else {
		res = MP_UNDEF;
		goto CLEANUP;
	}

	/* set name, if any */
	if ((group != NULL) && (params->text != NULL)) {
#ifdef _KERNEL
		int n = strlen(params->text) + 1;

		group->text = kmem_alloc(n, kmflag);
		if (group->text == NULL) {
			res = MP_MEM;
			goto CLEANUP;
		}
		bcopy(params->text, group->text, n);
		group->text_len = n;
#else
		group->text = strdup(params->text);
		if (group->text == NULL) {
			res = MP_MEM;
		}
#endif
	}

  CLEANUP:
	mp_clear(&irr);
	mp_clear(&curvea);
	mp_clear(&curveb);
	mp_clear(&genx);
	mp_clear(&geny);
	mp_clear(&order);
	if (res != MP_OKAY) {
		ECGroup_free(group);
		return NULL;
	}
	return group;
}
/*
 * rfs4_find_dr:
 *
 * Search for an entry in the duplicate request cache by
 * calculating the hash index based on the XID, and examining
 * the entries in the hash bucket. If we find a match, return.
 * Once we have searched the bucket we call rfs4_alloc_dr() to
 * allocate a new entry, or reuse one that is available.
 */
int
rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup)
{

	uint32_t	the_xid;
	list_t		*dr_bkt;
	rfs4_dupreq_t	*drp;
	int		bktdex;

	/*
	 * Get the XID, calculate the bucket and search to
	 * see if we need to replay from the cache.
	 */
	the_xid = req->rq_xprt->xp_xid;
	bktdex = the_xid % drc->dr_hash;

	dr_bkt = (list_t *)
	    &(drc->dr_buckets[(the_xid % drc->dr_hash)]);

	DTRACE_PROBE3(nfss__i__drc_bktdex,
	    int, bktdex,
	    uint32_t, the_xid,
	    list_t *, dr_bkt);

	*dup = NULL;

	mutex_enter(&drc->lock);
	/*
	 * Search the bucket for a matching xid and address.
	 */
	for (drp = list_head(dr_bkt); drp != NULL;
	    drp = list_next(dr_bkt, drp)) {

		if (drp->dr_xid == the_xid &&
		    drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
		    bcmp((caddr_t)drp->dr_addr.buf,
		    (caddr_t)req->rq_xprt->xp_rtaddr.buf,
		    drp->dr_addr.len) == 0) {

			/*
			 * Found a match so REPLAY the Reply
			 */
			if (drp->dr_state == NFS4_DUP_REPLAY) {
				rfs4_dr_chstate(drp, NFS4_DUP_INUSE);
				mutex_exit(&drc->lock);
				*dup = drp;
				DTRACE_PROBE1(nfss__i__drc_replay,
				    rfs4_dupreq_t *, drp);
				return (NFS4_DUP_REPLAY);
			}

			/*
			 * This entry must be in transition, so return
			 * the 'pending' status.
			 */
			mutex_exit(&drc->lock);
			return (NFS4_DUP_PENDING);
		}
	}

	drp = rfs4_alloc_dr(drc);
	mutex_exit(&drc->lock);

	/*
	 * The DRC is full and all entries are in use. Upper function
	 * should error out this request and force the client to
	 * retransmit -- effectively this is a resource issue. NFSD
	 * threads tied up with native File System, or the cache size
	 * is too small for the server load.
	 */
	if (drp == NULL)
		return (NFS4_DUP_ERROR);

	/*
	 * Init the state to NEW.
	 */
	drp->dr_state = NFS4_DUP_NEW;

	/*
	 * If needed, resize the address buffer
	 */
	if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
		if (drp->dr_addr.buf != NULL)
			kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
		drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
		drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP);
		if (drp->dr_addr.buf == NULL) {
			/*
			 * If the malloc fails, mark the entry
			 * as free and put on the tail.
			 */
			drp->dr_addr.maxlen = 0;
			drp->dr_state = NFS4_DUP_FREE;
			mutex_enter(&drc->lock);
			list_insert_tail(&(drc->dr_cache), drp);
			mutex_exit(&drc->lock);
			return (NFS4_DUP_ERROR);
		}
	}


	/*
	 * Copy the address.
	 */
	drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len;

	bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf,
	    (caddr_t)drp->dr_addr.buf,
	    drp->dr_addr.len);

	drp->dr_xid = the_xid;
	drp->dr_bkt = dr_bkt;

	/*
	 * Insert at the head of the bucket and
	 * the drc lists..
	 */
	mutex_enter(&drc->lock);
	list_insert_head(&drc->dr_cache, drp);
	list_insert_head(dr_bkt, drp);
	mutex_exit(&drc->lock);

	*dup = drp;

	return (NFS4_DUP_NEW);
}
示例#27
0
/*
 * Shared implementation to inject a packet to or from an interface
 * Return value:
 *   0: successful
 *  -1: memory allocation failed
 *   1: other errors
 */
static int
ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6,
    ip_stack_t *ipst)
{
	ddi_taskq_t *tq = NULL;
	void (* func)(void *);
	injection_t *inject;
	mblk_t *mp;

	ASSERT(packet != NULL);
	ASSERT(packet->ni_packet != NULL);
	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);

	switch (style) {
	case NI_QUEUE_IN:
		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
		if (inject == NULL)
			return (-1);
		inject->inj_data = *packet;
		inject->inj_isv6 = isv6;
		/*
		 * deliver up into the kernel, immitating its reception by a
		 * network interface, add to list and schedule timeout
		 */
		func = ip_ni_queue_in_func;
		tq = eventq_queue_in;
		break;

	case NI_QUEUE_OUT:
		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
		if (inject == NULL)
			return (-1);
		inject->inj_data = *packet;
		inject->inj_isv6 = isv6;
		/*
		 * deliver out of the kernel, as if it were being sent via a
		 * raw socket so that IPFilter will see it again, add to list
		 * and schedule timeout
		 */
		func = ip_ni_queue_out_func;
		tq = eventq_queue_out;
		break;

	case NI_DIRECT_OUT: {
		struct sockaddr *sock;

		mp = packet->ni_packet;

		sock = (struct sockaddr *)&packet->ni_addr;
		/*
		 * ipfil_sendpkt was provided by surya to ease the
		 * problems associated with sending out a packet.
		 */
		switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
		    netstackid_to_zoneid(
		    ipst->ips_netstack->netstack_stackid))) {
		case 0 :
		case EINPROGRESS:
			return (0);
		case ECOMM :
		case ENONET :
			return (1);
		default :
			return (1);
		}
		/* NOTREACHED */
	}
	default:
		freemsg(packet->ni_packet);
		return (1);
	}

	ASSERT(tq != NULL);

	inject->inj_ptr = ipst;
	if (ddi_taskq_dispatch(tq, func, (void *)inject,
	    DDI_SLEEP) == DDI_FAILURE) {
		ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
		freemsg(packet->ni_packet);
		return (1);
	}
	return (0);
}
示例#28
0
static int
sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
    crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
    crypto_req_handle_t req)
{
	int ret = CRYPTO_SUCCESS;
	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);

	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
		return (CRYPTO_MECHANISM_INVALID);

	/* Add support for key by attributes (RFE 4706552) */
	if (key->ck_format != CRYPTO_KEY_RAW)
		return (CRYPTO_ARGUMENTS_BAD);

	ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
	    crypto_kmflag(req));
	if (ctx->cc_provider_private == NULL)
		return (CRYPTO_HOST_MEMORY);

	if (ctx_template != NULL) {
		/* reuse context template */
		bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
		    sizeof (sha1_hmac_ctx_t));
	} else {
		/* no context template, compute context */
		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
			uchar_t digested_key[SHA1_DIGEST_LENGTH];
			sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;

			/*
			 * Hash the passed-in key to get a smaller key.
			 * The inner context is used since it hasn't been
			 * initialized yet.
			 */
			PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
			    key->ck_data, keylen_in_bytes, digested_key);
			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
			    digested_key, SHA1_DIGEST_LENGTH);
		} else {
			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
			    key->ck_data, keylen_in_bytes);
		}
	}

	/*
	 * Get the mechanism parameters, if applicable.
	 */
	PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
		if (mechanism->cm_param == NULL ||
		    mechanism->cm_param_len != sizeof (ulong_t))
			ret = CRYPTO_MECHANISM_PARAM_INVALID;
		PROV_SHA1_GET_DIGEST_LEN(mechanism,
		    PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
		if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
		    SHA1_DIGEST_LENGTH)
			ret = CRYPTO_MECHANISM_PARAM_INVALID;
	}

	if (ret != CRYPTO_SUCCESS) {
		bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
		kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
		ctx->cc_provider_private = NULL;
	}

	return (ret);
}
示例#29
0
/* ARGSUSED */
int
mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
	struct lwp *l = curlwp;
	struct vnode *devvp;
	struct mfs_args *args = data;
	struct ufsmount *ump;
	struct fs *fs;
	struct mfsnode *mfsp;
	struct proc *p;
	int flags, error = 0;

	if (*data_len < sizeof *args)
		return EINVAL;

	p = l->l_proc;
	if (mp->mnt_flag & MNT_GETARGS) {
		struct vnode *vp;

		ump = VFSTOUFS(mp);
		if (ump == NULL)
			return EIO;

		vp = ump->um_devvp;
		if (vp == NULL)
			return EIO;

		mfsp = VTOMFS(vp);
		if (mfsp == NULL)
			return EIO;

		args->fspec = NULL;
		args->base = mfsp->mfs_baseoff;
		args->size = mfsp->mfs_size;
		*data_len = sizeof *args;
		return 0;
	}
	/*
	 * XXX turn off async to avoid hangs when writing lots of data.
	 * the problem is that MFS needs to allocate pages to clean pages,
	 * so if we wait until the last minute to clean pages then there
	 * may not be any pages available to do the cleaning.
	 * ... and since the default partially-synchronous mode turns out
	 * to not be sufficient under heavy load, make it full synchronous.
	 */
	mp->mnt_flag &= ~MNT_ASYNC;
	mp->mnt_flag |= MNT_SYNCHRONOUS;

	/*
	 * If updating, check whether changing from read-only to
	 * read/write; if there is no device name, that's all we do.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		ump = VFSTOUFS(mp);
		fs = ump->um_fs;
		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
			flags = WRITECLOSE;
			if (mp->mnt_flag & MNT_FORCE)
				flags |= FORCECLOSE;
			error = ffs_flushfiles(mp, flags, l);
			if (error)
				return (error);
		}
		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
			fs->fs_ronly = 0;
		if (args->fspec == NULL)
			return EINVAL;
		return (0);
	}
	error = getnewvnode(VT_MFS, NULL, mfs_vnodeop_p, NULL, &devvp);
	if (error)
		return (error);
	devvp->v_vflag |= VV_MPSAFE;
	devvp->v_type = VBLK;
	spec_node_init(devvp, makedev(255, mfs_minor));
	mfs_minor++;
	mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
	devvp->v_data = mfsp;
	mfsp->mfs_baseoff = args->base;
	mfsp->mfs_size = args->size;
	mfsp->mfs_vnode = devvp;
	mfsp->mfs_proc = p;
	mfsp->mfs_shutdown = 0;
	cv_init(&mfsp->mfs_cv, "mfsidl");
	mfsp->mfs_refcnt = 1;
	bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
	if ((error = ffs_mountfs(devvp, mp, l)) != 0) {
		mfsp->mfs_shutdown = 1;
		vrele(devvp);
		return (error);
	}
	ump = VFSTOUFS(mp);
	fs = ump->um_fs;
	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
	if (error)
		return error;
	(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
		sizeof(fs->fs_fsmnt));
	fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0';
	/* XXX: cleanup on error */
	return 0;
}
示例#30
0
文件: gen_drv.c 项目: andreiw/polaris
static int
gen_create_minor_nodes(dev_info_t *devi, struct dstate *dstatep)
{
	int rval = DDI_SUCCESS;
	char *node_name;

	node_name = ddi_node_name(devi);

	if (strcmp(node_name, "disk_chan") == 0) {
		rval = gen_create_mn_disk_chan(devi);
	} else if (strcmp(node_name, "disk_wwn") == 0) {
		rval = gen_create_mn_disk_wwn(devi);
	} else if (strcmp(node_name, "disk_cdrom") == 0) {
		rval = gen_create_mn_disk_cdrom(devi);
	} else if (strcmp(node_name, "disk_fd") == 0) {
		rval = gen_create_mn_disk_fd(devi);
	} else if (strcmp(node_name, "cgtwenty") == 0) {
		rval = gen_create_display(devi);
	} else if (strcmp(node_name, "genzs") == 0) {
		rval = gen_create_serial(devi);
	} else if (strcmp(node_name, "net") == 0) {
		rval = gen_create_net(devi);
	} else {
		int instance = ddi_get_instance(devi);
		char *node_type;

		/*
		 * Solaris may directly hang the node_type off the minor node
		 * (without making a copy).  Since we free the node_type
		 * property below we need to make a private copy to pass
		 * to ddi_create_minor_node to avoid devinfo snapshot panics.
		 * We store a pointer to our copy in dstate and free it in
		 * gen_detach after the minor nodes have been deleted by
		 * ddi_remove_minor_node.
		 */
		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi,
		    DDI_PROP_DONTPASS, "node-type", &node_type) != 0) {
			cmn_err(CE_WARN, "couldn't get node-type\n");
			return (DDI_FAILURE);
		}
		if (node_type) {
			dstatep->node_type = kmem_alloc(
			    strlen(node_type) + 1, KM_SLEEP);
			(void) strcpy(dstatep->node_type, node_type);
		}
		ddi_prop_free(node_type);

		/* the minor name is the same as the node name */
		if (ddi_create_minor_node(devi, node_name, S_IFCHR,
		    (INST_TO_MINOR(instance)), dstatep->node_type, NULL) !=
		    DDI_SUCCESS) {
			if (dstatep->node_type) {
				kmem_free(dstatep->node_type,
				    strlen(dstatep->node_type) + 1);
				dstatep->node_type = NULL;
			}
			return (DDI_FAILURE);
		}
		return (DDI_SUCCESS);
	}

	if (rval != DDI_SUCCESS) {
		ddi_prop_remove_all(devi);
		ddi_remove_minor_node(devi, NULL);
	}

	return (rval);
}