/* * Convert the shortform directory to block form. */ int /* error */ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ xfs_dir2_block_t *block; /* block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ char *buf; /* sf buffer */ int buf_len; xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int endoffset; /* end of data objects */ int error; /* error return value */ int i; /* index */ xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log block header */ int needscan; /* need to scan block freespc */ int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ xfs_dir2_sf_t *sfp; /* shortform structure */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; xfs_dir2_trace_args("sf_to_block", args); dp = args->dp; tp = args->trans; mp = dp->i_mount; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); /* * Bomb out if the shortform directory is way too short. */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); return XFS_ERROR(EIO); } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Copy the directory into the stack buffer. * Then pitch the incore inode data so we can make extents. */ buf_len = dp->i_df.if_bytes; buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); memcpy(buf, sfp, dp->i_df.if_bytes); xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* * Reset pointer - old sfp is gone. */ sfp = (xfs_dir2_sf_t *)buf; /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { kmem_free(buf); return error; } /* * Initialize the data block. */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { kmem_free(buf); return error; } block = bp->data; block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ dup = (xfs_dir2_data_unused_t *)block->u; needlog = needscan = 0; xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); ASSERT(needscan == 0); /* * Fill in the tail. */ btp = xfs_dir2_block_tail_p(mp, block); btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); endoffset = (uint)((char *)blp - (char *)block); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(tp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* * Loop over existing entries, stuff them in. */ if ((i = 0) == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); /* * Need to preserve the existing offset values in the sf directory. * Insert holes (unused entries) where necessary. */ while (offset < endoffset) { /* * sfep is null when we reach the end of the list. */ if (sfep == NULL) newoffset = endoffset; else newoffset = xfs_dir2_sf_get_offset(sfep); /* * There should be a hole here, make one. */ if (offset < newoffset) { dup = (xfs_dir2_data_unused_t *) ((char *)block + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)block)); xfs_dir2_data_log_unused(tp, bp, dup); (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep))); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); offset = (int)((char *)(tagp + 1) - (char *)block); if (++i == sfp->hdr.count) sfep = NULL; else sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ kmem_free(buf); /* * Sort the leaf entries by hash value. */ xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort); /* * Log the leaf entry area and tail. * Already logged the header in data_init, ignore needlog. */ ASSERT(needscan == 0); xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_check(dp, bp); xfs_da_buf_done(bp); return 0; }
/* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode * log format structure, the second with the on-disk inode structure, * and a possible third and/or fourth with the inode data/extents/b-tree * root and inode attributes data/extents/b-tree root. */ STATIC void xfs_inode_item_format( xfs_inode_log_item_t *iip, xfs_log_iovec_t *log_vector) { uint nvecs; xfs_log_iovec_t *vecp; xfs_inode_t *ip; size_t data_bytes; xfs_bmbt_rec_t *ext_buffer; int nrecs; xfs_mount_t *mp; ip = iip->ili_inode; vecp = log_vector; vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); vecp++; nvecs = 1; /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. * * This is the same logic as xfs_iflush() but this code can't * run at the same time as xfs_iflush because we're in commit * processing here and so we have the inode lock held in * exclusive mode. Although it doesn't really matter * for the timestamps if both routines were to grab the * timestamps or not. That would be ok. * * We clear i_update_core before copying out the data. * This is for coordination with our timestamp updates * that don't hold the inode lock. They will always * update the timestamps BEFORE setting i_update_core, * so if we clear i_update_core after they set it we * are guaranteed to see their updates to the timestamps * either here. Likewise, if they set it after we clear it * here, we'll see it either on the next commit of this * inode or the next time the inode gets flushed via * xfs_iflush(). This depends on strongly ordered memory * semantics, but we have that. We use the SYNCHRONIZE * macro to make sure that the compiler does not reorder * the i_update_core access below the data copy below. */ if (ip->i_update_core) { ip->i_update_core = 0; SYNCHRONIZE(); } /* * We don't have to worry about re-ordering here because * the update_size field is protected by the inode lock * and we have that held in exclusive mode. */ if (ip->i_update_size) ip->i_update_size = 0; /* * Make sure to get the latest atime from the Linux inode. */ xfs_synchronize_atime(ip); /* * make sure the linux inode is dirty */ xfs_mark_inode_dirty_sync(ip); vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; /* * If this is really an old format inode, then we need to * log it as such. This means that we have to copy the link * count from the new field to the old. We don't have to worry * about the new fields, because nothing trusts them as long as * the old inode version number is there. If the superblock already * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || xfs_sb_version_hasnlink(&mp->m_sb)); if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); ip->i_d.di_onlink = ip->i_d.di_nlink; } else { /* * The superblock version has already been bumped, * so just make the conversion to the new inode * format permanent. */ ip->i_d.di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_extents_buf == NULL); nrecs = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nrecs > 0); #ifdef XFS_NATIVE_HOST if (nrecs == ip->i_d.di_nextents) { /* * There are no delayed allocation * extents, so just point to the * real extents array. */ vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } else #endif { /* * There are delayed allocation extents * in the inode, or we need to convert * the extents to on disk format. * Use xfs_iextents_copy() * to copy only the real extents into * a separate buffer. We'll free the * buffer in the unlock routine. */ ext_buffer = kmem_alloc(ip->i_df.if_bytes, KM_SLEEP); iip->ili_extents_buf = ext_buffer; vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { ASSERT(ip->i_df.if_broot_bytes > 0); ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } break; case XFS_DINODE_FMT_DEV: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_DEV))); if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } break; default: ASSERT(0); break; } /* * If there are no attributes associated with the file, * then we're done. * Assert that no attribute-related log flags are set. */ if (!XFS_IFORK_Q(ip)) { ASSERT(nvecs == iip->ili_item.li_desc->lid_size); iip->ili_format.ilf_size = nvecs; ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); return; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_extents != NULL); ASSERT(ip->i_d.di_anextents > 0); #ifdef DEBUG nrecs = ip->i_afp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); #endif ASSERT(nrecs > 0); ASSERT(nrecs == ip->i_d.di_anextents); #ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents * for attributes, so just point at the array. */ vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); vecp->i_len = ip->i_afp->if_bytes; #else ASSERT(iip->ili_aextents_buf == NULL); /* * Need to endian flip before logging */ ext_buffer = kmem_alloc(ip->i_afp->if_bytes, KM_SLEEP); iip->ili_aextents_buf = ext_buffer; vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { ASSERT(ip->i_afp->if_broot_bytes > 0); ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } break; default: ASSERT(0); break; } ASSERT(nvecs == iip->ili_item.li_desc->lid_size); iip->ili_format.ilf_size = nvecs; }
/* * Lock a directory entry. A dirlock on <dzp, name> protects that name * in dzp's directory zap object. As long as you hold a dirlock, you can * assume two things: (1) dzp cannot be reaped, and (2) no other thread * can change the zap entry for (i.e. link or unlink) this name. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory * ZCILOOK: On a mixed sensitivity file system, * this lookup should be case-insensitive. * ZCIEXACT: On a purely case-insensitive file system, * this lookup should be case-sensitive. * ZRENAMING: we are locking for renaming, force narrow locks * ZHAVELOCK: Don't grab the z_name_lock for this call. The * current thread already holds it. * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * dlpp - pointer to the dirlock for this entry (NULL on error) * direntflags - (case-insensitive lookup only) * flags if multiple case-sensitive matches exist in directory * realpnp - (case-insensitive lookup only) * actual name matched within the directory * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. * NOTE: For case-insensitive file systems we take wide locks (see below), * but return znode pointers to a single match. */ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp) { zfs_sb_t *zsb = ZTOZSB(dzp); zfs_dirlock_t *dl; boolean_t update; boolean_t exact; uint64_t zoid; #ifdef HAVE_DNLC vnode_t *vp = NULL; #endif /* HAVE_DNLC */ int error = 0; int cmpflags; *zpp = NULL; *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) || (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)) return (SET_ERROR(EEXIST)); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zsb->z_case and zsb->z_norm fields. These choices * affect what vnodes can be cached in the DNLC, how we * perform zap lookups, and the "width" of our dirlocks. * * A normal dirlock locks a single name. Note that with * normalization a name can be composed multiple ways, but * when normalized, these names all compare equal. A wide * dirlock locks multiple names. We need these when the file * system is supporting mixed-mode access. It is sometimes * necessary to lock all case permutations of file name at * once so that simultaneous case-insensitive/case-sensitive * behaves as rationally as possible. */ /* * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. */ exact = ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); /* * Only look in or update the DNLC if we are looking for the * name on a file system that does not require normalization * or case folding. We can also look there if we happen to be * on a non-normalizing, mixed sensitivity file system IF we * are looking for the exact name. * * Maybe can add TO-UPPERed version of name to dnlc in ci-only * case for performance improvement? */ update = !zsb->z_norm || ((zsb->z_case == ZFS_CASE_MIXED) && !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); /* * ZRENAMING indicates we are in a situation where we should * take narrow locks regardless of the file system's * preferences for normalizing and case folding. This will * prevent us deadlocking trying to grab the same wide lock * twice if the two names happen to be case-insensitive * matches. */ if (flag & ZRENAMING) cmpflags = 0; else cmpflags = zsb->z_norm; /* * Wait until there are no locks on this name. * * Don't grab the the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); if (!(flag & ZHAVELOCK)) rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (SET_ERROR(ENOENT)); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, U8_UNICODE_LATEST, &error) == 0) || error != 0) break; } if (error != 0) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (SET_ERROR(ENOENT)); } if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namelock = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); } /* * If the z_name_lock was NOT held for this dirlock record it. */ if (flag & ZHAVELOCK) dl->dl_namelock = 1; if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? SET_ERROR(ENOENT) : 0); } else { #ifdef HAVE_DNLC if (update) vp = dnlc_lookup(ZTOI(dzp), name); if (vp == DNLC_NO_VNODE) { iput(vp); error = SET_ERROR(ENOENT); } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); iput(vp); return (SET_ERROR(EEXIST)); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zfs_match_find(zsb, dzp, name, exact, update, direntflags, realpnp, &zoid); } #else error = zfs_match_find(zsb, dzp, name, exact, update, direntflags, realpnp, &zoid); #endif /* HAVE_DNLC */ } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (SET_ERROR(EEXIST)); } error = zfs_zget(zsb, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } #ifdef HAVE_DNLC if (!(flag & ZXATTR) && update) dnlc_update(ZTOI(dzp), name, ZTOI(*zpp)); #endif /* HAVE_DNLC */ } *dlpp = dl; return (0); }
/* * find prom phys pages and alloc space for a tmp copy */ static int i_cpr_find_ppages(void) { struct page *pp; struct memlist *pmem; pgcnt_t npages, pcnt, scnt, vcnt; pfn_t ppn, plast, *dst; int mapflag; cpr_clear_bitmaps(); mapflag = REGULAR_BITMAP; /* * there should be a page_t for each phys page used by the kernel; * set a bit for each phys page not tracked by a page_t */ pcnt = 0; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (page_numtopp_nolock(ppn)) continue; (void) cpr_setbit(ppn, mapflag); pcnt++; } } memlist_read_unlock(); /* * clear bits for phys pages in each segment */ scnt = cpr_count_seg_pages(mapflag, cpr_clrbit); /* * set bits for phys pages referenced by the promvp vnode; * these pages are mostly comprised of forthdebug words */ vcnt = 0; for (pp = promvp.v_pages; pp; ) { if (cpr_setbit(pp->p_offset, mapflag) == 0) vcnt++; pp = pp->p_vpnext; if (pp == promvp.v_pages) break; } /* * total number of prom pages are: * (non-page_t pages - seg pages + vnode pages) */ ppage_count = pcnt - scnt + vcnt; CPR_DEBUG(CPR_DEBUG1, "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n", pcnt, scnt, vcnt, ppage_count); /* * alloc array of pfn_t to store phys page list */ pphys_list_size = ppage_count * sizeof (pfn_t); pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP); if (pphys_list == NULL) { cpr_err(CE_WARN, "cannot alloc pphys_list"); return (ENOMEM); } /* * phys pages referenced in the bitmap should be * those used by the prom; scan bitmap and save * a list of prom phys page numbers */ dst = pphys_list; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->ml_next) { npages = mmu_btop(pmem->ml_size); ppn = mmu_btop(pmem->ml_address); for (plast = ppn + npages; ppn < plast; ppn++) { if (cpr_isset(ppn, mapflag)) { ASSERT(dst < (pphys_list + ppage_count)); *dst++ = ppn; } } } memlist_read_unlock(); /* * allocate space to store prom pages */ ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP); if (ppage_buf == NULL) { kmem_free(pphys_list, pphys_list_size); pphys_list = NULL; cpr_err(CE_WARN, "cannot alloc ppage_buf"); return (ENOMEM); } return (0); }
void * kalloc_canblock( vm_size_t size, boolean_t canblock) { zone_t z; if (size < MAX_SIZE_ZDLUT) z = get_zone_dlut(size); else if (size < kalloc_max_prerounded) z = get_zone_search(size, k_zindex_start); else { /* * If size is too large for a zone, then use kmem_alloc. * (We use kmem_alloc instead of kmem_alloc_kobject so that * krealloc can use kmem_realloc.) */ vm_map_t alloc_map; void *addr; /* kmem_alloc could block so we return if noblock */ if (!canblock) { return(NULL); } if (size >= kalloc_kernmap_size) alloc_map = kernel_map; else alloc_map = kalloc_map; if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) { if (alloc_map != kernel_map) { if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) addr = NULL; } else addr = NULL; } if (addr != NULL) { kalloc_spin_lock(); /* * Thread-safe version of the workaround for 4740071 * (a double FREE()) */ if (size > kalloc_largest_allocated) kalloc_largest_allocated = size; kalloc_large_inuse++; kalloc_large_total += size; kalloc_large_sum += size; if (kalloc_large_total > kalloc_large_max) kalloc_large_max = kalloc_large_total; kalloc_unlock(); KALLOC_ZINFO_SALLOC(size); } return(addr); } #ifdef KALLOC_DEBUG if (size > z->elem_size) panic("%s: z %p (%s) but requested size %lu", __func__, z, z->zone_name, (unsigned long)size); #endif assert(size <= z->elem_size); return (zalloc_canblock(z, canblock)); }
/* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfs_sb_t *zsb = ZTOZSB(dzp); dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; if (zsb->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); } obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (S_ISDIR(vap->va_mode)) { if (zsb->z_replay) { VERIFY0(zap_create_claim_norm(zsb->z_os, obj, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx)); } else { obj = zap_create_norm(zsb->z_os, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { if (zsb->z_replay) { VERIFY0(dmu_object_claim(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx)); } else { obj = dmu_object_alloc(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } ZFS_OBJ_HOLD_ENTER(zsb, obj); VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zsb->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) rdev = vap->va_rdev; parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & ATTR_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl, ZTOI(dzp)); VERIFY(*zpp != NULL); VERIFY(dzp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); ZFS_OBJ_HOLD_EXIT(zsb, obj); }
/* Process an XPMEM_DOMID_REQUEST/RESPONSE/RELEASE command */ static int xpmem_fwd_process_domid_cmd(struct xpmem_partition_state * part_state, xpmem_link_t link, struct xpmem_cmd_ex * cmd) { struct xpmem_fwd_state * fwd_state = part_state->fwd_state; /* There's no reason not to reuse the input command struct for responses */ struct xpmem_cmd_ex * out_cmd = cmd; xpmem_link_t out_link = link; switch (cmd->type) { case XPMEM_DOMID_REQUEST: { /* A domid is requested by someone downstream from us on link * 'link'. If we can't reach the nameserver, just return failure, * because the request should not come through us unless we have a * route already */ if (!xpmem_have_ns_link(fwd_state)) { return -1; } /* Buffer the request */ { struct xpmem_domid_req_iter * iter = NULL; unsigned long flags = 0; iter = kmem_alloc(sizeof(struct xpmem_domid_req_iter)); if (!iter) { return -ENOMEM; } iter->link = link; spin_lock_irqsave(&(fwd_state->lock), flags); { list_add_tail(&(iter->node), &(fwd_state->domid_req_list)); } spin_unlock_irqrestore(&(fwd_state->lock), flags); /* Forward request up to the nameserver */ out_link = fwd_state->ns_link; } break; } case XPMEM_DOMID_RESPONSE: { int ret = 0; /* We've been allocated a domid. * * If our domain has no domid, take it for ourselves it. * Otherwise, assign it to a link that has requested a domid from us */ if (part_state->domid <= 0) { part_state->domid = cmd->domid_req.domid; /* Update the domid map to remember our own domid */ ret = xpmem_add_domid(part_state, part_state->domid, part_state->local_link); if (ret == 0) { XPMEM_ERR("Cannot insert domid %lli into hashtable", part_state->domid); return -EFAULT; } return 0; } else { struct xpmem_domid_req_iter * iter = NULL; unsigned long flags = 0; if (list_empty(&(fwd_state->domid_req_list))) { XPMEM_ERR("We currently do not support the buffering of XPMEM domids"); return -1; } spin_lock_irqsave(&(fwd_state->lock), flags); { iter = list_first_entry(&(fwd_state->domid_req_list), struct xpmem_domid_req_iter, node); list_del(&(iter->node)); } spin_unlock_irqrestore(&(fwd_state->lock), flags); /* Forward the domid to this link */ out_link = iter->link; kmem_free(iter); /* Update the domid map to remember who has this */ ret = xpmem_add_domid(part_state, cmd->domid_req.domid, out_link); if (ret == 0) { XPMEM_ERR("Cannot insert domid %lli into hashtable", cmd->domid_req.domid); return -EFAULT; } } break; } case XPMEM_DOMID_RELEASE: /* Someone downstream is releasing their domid: simply forward to the * namserver */ out_link = xpmem_search_domid(part_state, out_cmd->dst_dom); if (out_link == 0) { XPMEM_ERR("Cannot find domid %lli in hashtable", out_cmd->dst_dom); return -EFAULT; } break; default: { XPMEM_ERR("Unknown DOMID operation: %s", cmd_to_string(cmd->type)); return -EINVAL; } } /* Send the response */ if (xpmem_send_cmd_link(part_state, out_link, out_cmd)) { XPMEM_ERR("Cannot send command on link %lli", out_link); return -EFAULT; } return 0; }
static int vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_disk_t *dvd = vd->vdev_tsd; vdev_disk_buf_t *vdb; struct dk_callback *dkc; buf_t *bp; int error; if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; if (vd->vdev_nowritecache) { zio->io_error = SET_ERROR(ENOTSUP); break; } zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); zio->io_vsd_ops = &vdev_disk_vsd_ops; dkc->dkc_callback = vdev_disk_ioctl_done; dkc->dkc_flag = FLUSH_VOLATILE; dkc->dkc_cookie = zio; error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, (uintptr_t)dkc, FKIOCTL, kcred, NULL); if (error == 0) { /* * The ioctl will be done asychronously, * and will call vdev_disk_ioctl_done() * upon completion. */ return (ZIO_PIPELINE_STOP); } if (error == ENOTSUP || error == ENOTTY) { /* * If we get ENOTSUP or ENOTTY, we know that * no future attempts will ever succeed. * In this case we set a persistent bit so * that we don't bother with the ioctl in the * future. */ vd->vdev_nowritecache = B_TRUE; } zio->io_error = error; break; default: zio->io_error = SET_ERROR(ENOTSUP); } return (ZIO_PIPELINE_CONTINUE); } vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); vdb->vdb_io = zio; bp = &vdb->vdb_buf; bioinit(bp); bp->b_flags = B_BUSY | B_NOCACHE | (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_iodone = (int (*)())vdev_disk_io_intr; /* ldi_strategy() will return non-zero only on programming errors */ VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); return (ZIO_PIPELINE_STOP); }
/* * Given the root disk device devid or pathname, read the label from * the device, and construct a configuration nvlist. */ int vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) { ldi_handle_t vd_lh; vdev_label_t *label; uint64_t s, size; int l; ddi_devid_t tmpdevid; int error = -1; char *minor_name; /* * Read the device label and build the nvlist. */ if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid, &minor_name) == 0) { error = ldi_open_by_devid(tmpdevid, minor_name, FREAD, kcred, &vd_lh, zfs_li); ddi_devid_free(tmpdevid); ddi_devid_str_free(minor_name); } if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, zfs_li))) return (error); if (ldi_get_size(vd_lh, &s)) { (void) ldi_close(vd_lh, FREAD, kcred); return (SET_ERROR(EIO)); } size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); *config = NULL; for (l = 0; l < VDEV_LABELS; l++) { uint64_t offset, state, txg = 0; /* read vdev label */ offset = vdev_label_offset(size, l, 0); if (vdev_disk_physio(vd_lh, (caddr_t)label, VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0) continue; if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) { *config = NULL; continue; } if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || state >= POOL_STATE_DESTROYED) { nvlist_free(*config); *config = NULL; continue; } if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0) { nvlist_free(*config); *config = NULL; continue; } break; } kmem_free(label, sizeof (vdev_label_t)); (void) ldi_close(vd_lh, FREAD, kcred); if (*config == NULL) error = SET_ERROR(EIDRM); return (error); }
int s3c2440_i2s_alloc(void *handle, int direction, size_t size, int flags, s3c2440_i2s_buf_t *out) { int kalloc_flags = KM_SLEEP; int dma_flags = BUS_DMA_WAITOK; int retval = 0; struct s3c2xx0_softc *sc = s3c2xx0_softc; /* Shortcut */ s3c2440_i2s_buf_t buf; DPRINTF(("%s\n", __func__)); if (flags & M_NOWAIT) { kalloc_flags = KM_NOSLEEP; dma_flags = BUS_DMA_NOWAIT; } *out = kmem_alloc(sizeof(struct s3c2440_i2s_buf), kalloc_flags); if (*out == NULL) { DPRINTF(("Failed to allocate memory\n")); return ENOMEM; } buf = *out; buf->i2b_parent = handle; buf->i2b_size = size; buf->i2b_nsegs = S3C2440_I2S_BUF_MAX_SEGS; buf->i2b_xfer = NULL; buf->i2b_cb = NULL; buf->i2b_cb_cookie = NULL; /* We first allocate some DMA-friendly memory for the buffer... */ retval = bus_dmamem_alloc(sc->sc_dmat, buf->i2b_size, NBPG, 0, buf->i2b_segs, buf->i2b_nsegs, &buf->i2b_nsegs, dma_flags); if (retval != 0) { printf("%s: Failed to allocate DMA memory\n", __func__); goto cleanup_dealloc; } DPRINTF(("%s: Using %d DMA segments\n", __func__, buf->i2b_nsegs)); retval = bus_dmamem_map(sc->sc_dmat, buf->i2b_segs, buf->i2b_nsegs, buf->i2b_size, &buf->i2b_addr, dma_flags); if (retval != 0) { printf("%s: Failed to map DMA memory\n", __func__); goto cleanup_dealloc_dma; } DPRINTF(("%s: Playback DMA buffer mapped at %p\n", __func__, buf->i2b_addr)); /* XXX: Not sure if nsegments is really 1...*/ retval = bus_dmamap_create(sc->sc_dmat, buf->i2b_size, 1, buf->i2b_size, 0, dma_flags, &buf->i2b_dmamap); if (retval != 0) { printf("%s: Failed to create DMA map\n", __func__); goto cleanup_unmap_dma; } DPRINTF(("%s: DMA map created successfully\n", __func__)); buf->i2b_xfer = s3c2440_dmac_allocate_xfer(M_NOWAIT); if (buf->i2b_xfer == NULL) { retval = ENOMEM; goto cleanup_destroy_dmamap; } return 0; cleanup_destroy_dmamap: bus_dmamap_destroy(sc->sc_dmat, buf->i2b_dmamap); cleanup_unmap_dma: bus_dmamem_unmap(sc->sc_dmat, &buf->i2b_addr, buf->i2b_size); cleanup_dealloc_dma: bus_dmamem_free(sc->sc_dmat, buf->i2b_segs, buf->i2b_nsegs); cleanup_dealloc: kmem_free(*out, sizeof(struct s3c2440_i2s_buf)); return retval; }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd; struct dk_minfo_ext dkmext; int error; dev_t dev; int otyp; boolean_t validate_devid = B_FALSE; ddi_devid_t devid; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (vd->vdev_tsd != NULL) { ASSERT(vd->vdev_reopening); dvd = vd->vdev_tsd; goto skip_open; } dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); ldi_handle_t lh; (void) snprintf(buf, len, "%ss0", vd->vdev_path); if (ldi_open_by_name(buf, spa_mode(spa), kcred, &lh, zfs_li) == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; (void) ldi_close(lh, spa_mode(spa), kcred); } else { kmem_free(buf, len); } } error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = SET_ERROR(EINVAL); (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) { error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { if (vd->vdev_devid != NULL) validate_devid = B_TRUE; if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } /* * Now that the device has been successfully opened, update the devid * if necessary. */ if (validate_devid && spa_writeable(spa) && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { char *vd_devid; vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); zfs_dbgmsg("vdev %s: update devid from %s, " "to %s", vd->vdev_path, vd->vdev_devid, vd_devid); spa_strfree(vd->vdev_devid); vd->vdev_devid = spa_strdup(vd_devid); ddi_devid_str_free(vd_devid); } ddi_devid_free(devid); } /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } skip_open: /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (SET_ERROR(EINVAL)); } /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext, FKIOCTL, kcred, NULL) != 0) dkmext.dki_pbsize = DEV_BSIZE; *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1; if (vd->vdev_wholedisk == 1) { uint64_t capacity = dkmext.dki_capacity - 1; uint64_t blksz = dkmext.dki_lbsize; int wce = 1; /* * If we own the whole disk, try to enable disk write caching. * We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); *max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz); zfs_dbgmsg("capacity change: vdev %s, psize %llu, " "max_psize %llu", vd->vdev_path, *psize, *max_psize); } else { *max_psize = *psize; } /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; return (0); }
/* * This is called to add the given busy item to the transaction's * list of busy items. It must find a free busy item descriptor * or allocate a new one and add the item to that descriptor. * The function returns a pointer to busy descriptor used to point * to the new busy entry. The log busy entry will now point to its new * descriptor with its ???? field. */ xfs_log_busy_slot_t * xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) { xfs_log_busy_chunk_t *lbcp; xfs_log_busy_slot_t *lbsp; int i=0; /* * If there are no free descriptors, allocate a new chunk * of them and put it at the front of the chunk list. */ if (tp->t_busy_free == 0) { lbcp = (xfs_log_busy_chunk_t*) kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP); ASSERT(lbcp != NULL); /* * Initialize the chunk, and then * claim the first slot in the newly allocated chunk. */ XFS_LBC_INIT(lbcp); XFS_LBC_CLAIM(lbcp, 0); lbcp->lbc_unused = 1; lbsp = XFS_LBC_SLOT(lbcp, 0); /* * Link in the new chunk and update the free count. */ lbcp->lbc_next = tp->t_busy.lbc_next; tp->t_busy.lbc_next = lbcp; tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1; /* * Initialize the descriptor and the generic portion * of the log item. * * Point the new slot at this item and return it. * Also point the log item at its currently active * descriptor and set the item's mount pointer. */ lbsp->lbc_ag = ag; lbsp->lbc_idx = idx; return lbsp; } /* * Find the free descriptor. It is somewhere in the chunklist * of descriptors. */ lbcp = &tp->t_busy; while (lbcp != NULL) { if (XFS_LBC_VACANCY(lbcp)) { if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) { i = lbcp->lbc_unused; break; } else { /* out-of-order vacancy */ cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); ASSERT(0); } } lbcp = lbcp->lbc_next; } ASSERT(lbcp != NULL); /* * If we find a free descriptor, claim it, * initialize it, and return it. */ XFS_LBC_CLAIM(lbcp, i); if (lbcp->lbc_unused <= i) { lbcp->lbc_unused = i + 1; } lbsp = XFS_LBC_SLOT(lbcp, i); tp->t_busy_free--; lbsp->lbc_ag = ag; lbsp->lbc_idx = idx; return lbsp; }
/* * This is called to add the given log item to the transaction's * list of log items. It must find a free log item descriptor * or allocate a new one and add the item to that descriptor. * The function returns a pointer to item descriptor used to point * to the new item. The log item will now point to its new descriptor * with its li_desc field. */ xfs_log_item_desc_t * xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) { xfs_log_item_desc_t *lidp; xfs_log_item_chunk_t *licp; int i=0; /* * If there are no free descriptors, allocate a new chunk * of them and put it at the front of the chunk list. */ if (tp->t_items_free == 0) { licp = (xfs_log_item_chunk_t*) kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP); ASSERT(licp != NULL); /* * Initialize the chunk, and then * claim the first slot in the newly allocated chunk. */ XFS_LIC_INIT(licp); XFS_LIC_CLAIM(licp, 0); licp->lic_unused = 1; XFS_LIC_INIT_SLOT(licp, 0); lidp = XFS_LIC_SLOT(licp, 0); /* * Link in the new chunk and update the free count. */ licp->lic_next = tp->t_items.lic_next; tp->t_items.lic_next = licp; tp->t_items_free = XFS_LIC_NUM_SLOTS - 1; /* * Initialize the descriptor and the generic portion * of the log item. * * Point the new slot at this item and return it. * Also point the log item at its currently active * descriptor and set the item's mount pointer. */ lidp->lid_item = lip; lidp->lid_flags = 0; lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; return lidp; } /* * Find the free descriptor. It is somewhere in the chunklist * of descriptors. */ licp = &tp->t_items; while (licp != NULL) { if (XFS_LIC_VACANCY(licp)) { if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { i = licp->lic_unused; ASSERT(XFS_LIC_ISFREE(licp, i)); break; } for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { if (XFS_LIC_ISFREE(licp, i)) break; } ASSERT(i <= XFS_LIC_MAX_SLOT); break; } licp = licp->lic_next; } ASSERT(licp != NULL); /* * If we find a free descriptor, claim it, * initialize it, and return it. */ XFS_LIC_CLAIM(licp, i); if (licp->lic_unused <= i) { licp->lic_unused = i + 1; XFS_LIC_INIT_SLOT(licp, i); } lidp = XFS_LIC_SLOT(licp, i); tp->t_items_free--; lidp->lid_item = lip; lidp->lid_flags = 0; lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; return lidp; }
/* * Scan the AGs starting at startag looking for an AG that isn't in use and has * at least minlen blocks free. */ static int xfs_filestream_pick_ag( struct xfs_inode *ip, xfs_agnumber_t startag, xfs_agnumber_t *agp, int flags, xfs_extlen_t minlen) { struct xfs_mount *mp = ip->i_mount; struct xfs_fstrm_item *item; struct xfs_perag *pag; xfs_extlen_t longest, free = 0, minfree, maxfree = 0; xfs_agnumber_t ag, max_ag = NULLAGNUMBER; int err, trylock, nscan; ASSERT(S_ISDIR(ip->i_d.di_mode)); /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; ag = startag; *agp = NULLAGNUMBER; /* For the first pass, don't sleep trying to init the per-AG. */ trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { trace_xfs_filestream_scan(ip, ag); pag = xfs_perag_get(mp, ag); if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); if (err && !trylock) { xfs_perag_put(pag); return err; } } /* Might fail sometimes during the 1st pass with trylock set. */ if (!pag->pagf_init) goto next_ag; /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; max_ag = ag; } /* * The AG reference count does two things: it enforces mutual * exclusion when examining the suitability of an AG in this * loop, and it guards against two filestreams being established * in the same AG as each other. */ if (xfs_filestream_get_ag(mp, ag) > 1) { xfs_filestream_put_ag(mp, ag); goto next_ag; } longest = xfs_alloc_longest_free_extent(mp, pag); if (((minlen && longest >= minlen) || (!minlen && pag->pagf_freeblks >= minfree)) && (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || (flags & XFS_PICK_LOWSPACE))) { /* Break out, retaining the reference on the AG. */ free = pag->pagf_freeblks; xfs_perag_put(pag); *agp = ag; break; } /* Drop the reference on this AG, it's not usable. */ xfs_filestream_put_ag(mp, ag); next_ag: xfs_perag_put(pag); /* Move to the next AG, wrapping to AG 0 if necessary. */ if (++ag >= mp->m_sb.sb_agcount) ag = 0; /* If a full pass of the AGs hasn't been done yet, continue. */ if (ag != startag) continue; /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */ if (trylock != 0) { trylock = 0; continue; } /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ if (!(flags & XFS_PICK_LOWSPACE)) { flags |= XFS_PICK_LOWSPACE; continue; } /* * Take the AG with the most free space, regardless of whether * it's already in use by another filestream. */ if (max_ag != NULLAGNUMBER) { xfs_filestream_get_ag(mp, max_ag); free = maxfree; *agp = max_ag; break; } /* take AG 0 if none matched */ trace_xfs_filestream_pick(ip, *agp, free, nscan); *agp = 0; return 0; } trace_xfs_filestream_pick(ip, *agp, free, nscan); if (*agp == NULLAGNUMBER) return 0; err = ENOMEM; item = kmem_alloc(sizeof(*item), KM_MAYFAIL); if (!item) goto out_put_ag; item->ag = *agp; item->ip = ip; err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { if (err == EEXIST) err = 0; goto out_free_item; } return 0; out_free_item: kmem_free(item); out_put_ag: xfs_filestream_put_ag(mp, *agp); return err; }
void i686_kmain(unsigned long magic, multiboot_info_t *info) { bootvideo_cls(); parse_cmdline(info->cmdline); if (use_serial) i686_tty_init(0, 9600); i686_kernel.debug = i686_debug; if (magic != MULTIBOOT_BOOTLOADER_MAGIC) { i686_debug("Not booted from multiboot loader!\n"); while (1); } i686_debug("mods_addr: %x\nmod_start: %x\n", info->mods_addr, 0); i686_kernel.mutex = &i686_mutex; i686_kernel.bsp = (struct cpu *)i686_cpu_alloc(); i686_kernel.bsp->kvirt = i686_virtmem_init(&i686_kernel); i686_kernel.phys = i686_physmem_alloc(&i686_kernel, info); kmem_init(i686_kernel.bsp->allocator); i686_kernel.bsp->v.init(i686_kernel.bsp); i686_debug("Location GDT entry: %x\n", ((struct i686_cpu *)i686_kernel.bsp)->gdt); virtaddr_t a; physaddr_t p; virtmem_error_t e1 = virtmem_kernel_alloc(i686_kernel.bsp->kvirt, &a, 1); assert(e1 == VIRTMEM_SUCCESS); physmem_error_t e2 = physmem_page_alloc(i686_kernel.bsp->localmem, 0, &p); assert(e2 == PHYSMEM_SUCCESS); virtmem_kernel_map_virt_to_phys(i686_kernel.bsp->kvirt, p, a); i686_debug("Allocated address: %x(->%x)\n", a, p); char *s = (char *)a; strcpy(s, "This shows the validity of this memory"); i686_debug("%x contains: %s\n", a, s); struct kmem_cache *s1 = kmem_alloc(i686_kernel.bsp->allocator); kmem_cache_init(i686_kernel.bsp->allocator, s1, i686_kernel.bsp, "test", 128, NULL, NULL); char *t1 = kmem_cache_alloc(s1); i686_debug("cache at %x provided us with %x\n", s1, t1); strcpy(t1, "This shows the validity of the slab allocation"); i686_debug("%x contains: %s\n", t1, t1); i686_address_space_init(); struct address_space *as; struct memory_region *mr; address_space_alloc(&as); memory_region_alloc(&mr); e1 = virtmem_kernel_alloc(i686_kernel.bsp->kvirt, &a, 1); virtmem_kernel_map_virt_to_phys(i686_kernel.bsp->kvirt, (physaddr_t)as->pd, a); address_space_init_region(as, mr, (virtaddr_t)0x1000000, 0x2000); memory_region_set_flags(mr, 1, 1); memory_region_map(as, mr, NULL); const char *teststr = "This is a test string to be copied to userspace."; char testcpybuf[128]; char opcodes[] = {0xeb, 0xfe}; virtmem_copy_kernel_to_user(i686_kernel.bsp->kvirt, as->pd, (void *)0x1000ffc, (const void *)teststr, strlen(teststr) + 1); virtmem_copy_user_to_kernel(i686_kernel.bsp->kvirt, (void *)&testcpybuf, as->pd, (const void *)0x1000ffc, strlen(teststr) + 1); i686_debug("testcpybuf contains '%s'\n", testcpybuf); virtmem_copy_kernel_to_user(i686_kernel.bsp->kvirt, as->pd, (void *)0x1000000, (const void *)opcodes, 2); struct thread *thr1; scheduler_thread_alloc(cpu()->sched, &thr1); thread_init(thr1, as); thr1->state = THREAD_RUNNABLE; scheduler_thread_add(cpu()->sched, thr1); scheduler_reschedule(cpu()->sched); virtmem_user_setup_kernelspace(i686_kernel.bsp->kvirt, as->pd); virtmem_set_context(i686_kernel.bsp->kvirt, as->pd); scheduler_resume(cpu()->sched); while (1); }
void reset_stripe(ms_unit_t *un, minor_t mnum, int removing) { ms_comp_t *mdcomp; struct ms_row *mdr; int i, c; int row; int nsv; int isv; sv_dev_t *sv; mddb_recid_t *recids; mddb_recid_t vtoc_id; int rid = 0; md_destroy_unit_incore(mnum, &stripe_md_ops); md_nblocks_set(mnum, -1ULL); MD_UNIT(mnum) = NULL; /* * Attempt release of its minor node */ md_remove_minor_node(mnum); if (!removing) return; nsv = 0; /* Count the number of devices */ for (row = 0; row < un->un_nrows; row++) { mdr = &un->un_row[row]; nsv += mdr->un_ncomp; } sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); /* * allocate recids array. since we may have to commit * underlying soft partition records, we need an array * of size: total number of components in stripe + 3 * (one for the stripe itself, one for the hotspare, one * for the end marker). */ recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); /* * Save the md_dev64_t's and driver nm indexes. * Because after the mddb_deleterec() we will * not be able to access the unit structure. * * NOTE: Deleting the names before deleting the * unit structure would cause problems if * the machine crashed in between the two. */ isv = 0; mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); for (row = 0; row < un->un_nrows; row++) { mdr = &un->un_row[row]; for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { struct ms_comp *mdc; md_dev64_t child_dev; md_unit_t *child_un; mdc = &mdcomp[c++]; if (mdc->un_mirror.ms_hs_id != 0) { mdkey_t hs_key; hs_key = mdc->un_mirror.ms_hs_key; mdc->un_dev = mdc->un_mirror.ms_orig_dev; mdc->un_start_block = mdc->un_mirror.ms_orig_blk; mdc->un_mirror.ms_hs_id = 0; mdc->un_mirror.ms_hs_key = 0; mdc->un_mirror.ms_orig_dev = 0; recids[0] = 0; recids[1] = 0; /* recids[1] filled in below */ recids[2] = 0; (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, 0, 0, &recids[0], &hs_key, NULL, NULL); mddb_commitrecs_wrapper(recids); } /* * check if we've got metadevice below us and * deparent it if we do. * NOTE: currently soft partitions are the * the only metadevices stripes can be * built on top of. */ child_dev = mdc->un_dev; if (md_getmajor(child_dev) == md_major) { child_un = MD_UNIT(md_getminor(child_dev)); md_reset_parent(child_dev); recids[rid++] = MD_RECID(child_un); } sv[isv].setno = MD_MIN2SET(mnum); sv[isv++].key = mdc->un_key; } } recids[rid++] = un->c.un_record_id; recids[rid] = 0; /* filled in below */ /* * Decrement the HSP reference count and * remove the knowledge of the HSP from the unit struct. * This is done atomically to remove a window. */ if (un->un_hsp_id != -1) { (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, &recids[rid++], NULL, NULL, NULL); un->un_hsp_id = -1; } /* set end marker and commit records */ recids[rid] = 0; mddb_commitrecs_wrapper(recids); vtoc_id = un->c.un_vtoc_id; /* * Remove self from the namespace */ if (un->c.un_revision & MD_FN_META_DEV) { (void) md_rem_selfname(un->c.un_self_id); } /* Remove the unit structure */ mddb_deleterec_wrapper(un->c.un_record_id); /* Remove the vtoc, if present */ if (vtoc_id) mddb_deleterec_wrapper(vtoc_id); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); md_rem_names(sv, nsv); kmem_free(sv, sizeof (sv_dev_t) * nsv); kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); }
int zpl_set_acl(struct inode *ip, int type, struct posix_acl *acl) { struct super_block *sb = ITOZSB(ip)->z_sb; char *name, *value = NULL; int error = 0; size_t size = 0; if (S_ISLNK(ip->i_mode)) return (-EOPNOTSUPP); switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { zpl_equivmode_t mode = ip->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error < 0) { return (error); } else { /* * The mode bits will have been set by * ->zfs_setattr()->zfs_acl_chmod_setattr() * using the ZFS ACL conversion. If they * differ from the Posix ACL conversion dirty * the inode to write the Posix mode bits. */ if (ip->i_mode != mode) { ip->i_mode = mode; ip->i_ctime = current_fs_time(sb); zfs_mark_inode_dirty(ip); } if (error == 0) acl = NULL; } } break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; if (!S_ISDIR(ip->i_mode)) return (acl ? -EACCES : 0); break; default: return (-EINVAL); } if (acl) { size = posix_acl_xattr_size(acl->a_count); value = kmem_alloc(size, KM_SLEEP); error = zpl_acl_to_xattr(acl, value, size); if (error < 0) { kmem_free(value, size); return (error); } } error = zpl_xattr_set(ip, name, value, size, 0); if (value) kmem_free(value, size); if (!error) { if (acl) zpl_set_cached_acl(ip, type, acl); else zpl_forget_cached_acl(ip, type); } return (error); }
kern_return_t mach_port_space_info( ipc_space_t space, ipc_info_space_t *infop, ipc_info_name_array_t *tablep, mach_msg_type_number_t *tableCntp, __unused ipc_info_tree_name_array_t *treep, __unused mach_msg_type_number_t *treeCntp) { ipc_info_name_t *table_info; vm_offset_t table_addr; vm_size_t table_size, table_size_needed; ipc_entry_t table; ipc_entry_num_t tsize; mach_port_index_t index; kern_return_t kr; vm_map_copy_t copy; if (space == IS_NULL) return KERN_INVALID_TASK; #if !(DEVELOPMENT | DEBUG) const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task) == 0); #else const boolean_t dbg_ok = TRUE; #endif /* start with in-line memory */ table_size = 0; for (;;) { is_read_lock(space); if (!is_active(space)) { is_read_unlock(space); if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); return KERN_INVALID_TASK; } table_size_needed = vm_map_round_page((space->is_table_size * sizeof(ipc_info_name_t)), VM_MAP_PAGE_MASK(ipc_kernel_map)); if (table_size_needed == table_size) break; is_read_unlock(space); if (table_size != table_size_needed) { if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed, VM_KERN_MEMORY_IPC); if (kr != KERN_SUCCESS) { return KERN_RESOURCE_SHORTAGE; } table_size = table_size_needed; } } /* space is read-locked and active; we have enough wired memory */ /* get the overall space info */ infop->iis_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD); infop->iis_table_size = space->is_table_size; infop->iis_table_next = space->is_table_next->its_size; /* walk the table for this space */ table = space->is_table; tsize = space->is_table_size; table_info = (ipc_info_name_array_t)table_addr; for (index = 0; index < tsize; index++) { ipc_info_name_t *iin = &table_info[index]; ipc_entry_t entry = &table[index]; ipc_entry_bits_t bits; bits = entry->ie_bits; iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits)); iin->iin_collision = 0; iin->iin_type = IE_BITS_TYPE(bits); if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE && entry->ie_request != IE_REQ_NONE) { __IGNORE_WCASTALIGN(ipc_port_t port = (ipc_port_t) entry->ie_object); assert(IP_VALID(port)); ip_lock(port); iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request); ip_unlock(port); } iin->iin_urefs = IE_BITS_UREFS(bits); iin->iin_object = (dbg_ok) ? (natural_t)VM_KERNEL_ADDRPERM((uintptr_t)entry->ie_object) : 0; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; } is_read_unlock(space); /* prepare the table out-of-line data for return */ if (table_size > 0) { if (table_size > infop->iis_table_size * sizeof(ipc_info_name_t)) bzero((char *)&table_info[infop->iis_table_size], table_size - infop->iis_table_size * sizeof(ipc_info_name_t)); kr = vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(table_addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(table_addr + table_size, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr, (vm_map_size_t)table_size, TRUE, ©); assert(kr == KERN_SUCCESS); *tablep = (ipc_info_name_t *)copy; *tableCntp = infop->iis_table_size; } else { *tablep = (ipc_info_name_t *)0; *tableCntp = 0; } /* splay tree is obsolete, no work to do... */ *treep = (ipc_info_tree_name_t *)0; *treeCntp = 0; return KERN_SUCCESS; }
/* * Called when the module is first loaded, this routine loads the configuration * file into the SPA namespace. It does not actually open or load the pools; it * only populates the namespace. */ void spa_config_load(void) { void *buf = NULL; nvlist_t *nvlist, *child; nvpair_t *nvpair; char *pathname; struct _buf *file; uint64_t fsize; #ifdef _KERNEL if (zfs_autoimport_disable) return; #endif /* * Open the configuration file. */ pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) snprintf(pathname, MAXPATHLEN, "%s%s", (rootdir != NULL) ? "./" : "", spa_config_path); file = kobj_open_file(pathname); kmem_free(pathname, MAXPATHLEN); if (file == (struct _buf *)-1) return; if (kobj_get_filesize(file, &fsize) != 0) goto out; buf = kmem_alloc(fsize, KM_SLEEP); /* * Read the nvlist from the file. */ if (kobj_read_file(file, buf, fsize, 0) < 0) goto out; /* * Unpack the nvlist. */ if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) goto out; /* * Iterate over all elements in the nvlist, creating a new spa_t for * each one with the specified configuration. */ mutex_enter(&spa_namespace_lock); nvpair = NULL; while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) continue; child = fnvpair_value_nvlist(nvpair); if (spa_lookup(nvpair_name(nvpair)) != NULL) continue; (void) spa_add(nvpair_name(nvpair), child, NULL); } mutex_exit(&spa_namespace_lock); nvlist_free(nvlist); out: if (buf != NULL) kmem_free(buf, fsize); kobj_close_file(file); }
/* * Construct a stack for init containing the arguments to it, then * pass control to exec_common. */ int exec_init(const char *initpath, const char *args) { caddr32_t ucp; caddr32_t *uap; caddr32_t *argv; caddr32_t exec_fnamep; char *scratchargs; int i, sarg; size_t argvlen, alen; boolean_t in_arg; int argc = 0; int error = 0, count = 0; proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); int brand_action; if (args == NULL) args = ""; alen = strlen(initpath) + 1 + strlen(args) + 1; scratchargs = kmem_alloc(alen, KM_SLEEP); (void) snprintf(scratchargs, alen, "%s %s", initpath, args); /* * We do a quick two state parse of the string to sort out how big * argc should be. */ in_arg = B_FALSE; for (i = 0; i < strlen(scratchargs); i++) { if (scratchargs[i] == ' ' || scratchargs[i] == '\0') { if (in_arg) { in_arg = B_FALSE; argc++; } } else { in_arg = B_TRUE; } } argvlen = sizeof (caddr32_t) * (argc + 1); argv = kmem_zalloc(argvlen, KM_SLEEP); /* * We pull off a bit of a hack here. We work our way through the * args string, putting nulls at the ends of space delimited tokens * (boot args don't support quoting at this time). Then we just * copy the whole mess to userland in one go. In other words, we * transform this: "init -s -r\0" into this on the stack: * * -0x00 \0 * -0x01 r * -0x02 - <--------. * -0x03 \0 | * -0x04 s | * -0x05 - <------. | * -0x06 \0 | | * -0x07 t | | * -0x08 i | | * -0x09 n | | * -0x0a i <---. | | * -0x10 NULL | | | (argv[3]) * -0x14 -----|--|-' (argv[2]) * -0x18 ------|--' (argv[1]) * -0x1c -------' (argv[0]) * * Since we know the value of ucp at the beginning of this process, * we can trivially compute the argv[] array which we also need to * place in userland: argv[i] = ucp - sarg(i), where ucp is the * stack ptr, and sarg is the string index of the start of the * argument. */ ucp = (caddr32_t)(uintptr_t)p->p_usrstack; argc = 0; in_arg = B_FALSE; sarg = 0; for (i = 0; i < alen; i++) { if (scratchargs[i] == ' ' || scratchargs[i] == '\0') { if (in_arg == B_TRUE) { in_arg = B_FALSE; scratchargs[i] = '\0'; argv[argc++] = ucp - (alen - sarg); } } else if (in_arg == B_FALSE) { in_arg = B_TRUE; sarg = i; } } ucp -= alen; error |= copyout(scratchargs, (caddr_t)(uintptr_t)ucp, alen); uap = (caddr32_t *)P2ALIGN((uintptr_t)ucp, sizeof (caddr32_t)); uap--; /* advance to be below the word we're in */ uap -= (argc + 1); /* advance argc words down, plus one for NULL */ error |= copyout(argv, uap, argvlen); if (error != 0) { zcmn_err(p->p_zone->zone_id, CE_WARN, "Could not construct stack for init.\n"); kmem_free(argv, argvlen); kmem_free(scratchargs, alen); return (EFAULT); } exec_fnamep = argv[0]; kmem_free(argv, argvlen); kmem_free(scratchargs, alen); /* * Point at the arguments. */ lwp->lwp_ap = lwp->lwp_arg; lwp->lwp_arg[0] = (uintptr_t)exec_fnamep; lwp->lwp_arg[1] = (uintptr_t)uap; lwp->lwp_arg[2] = NULL; curthread->t_post_sys = 1; curthread->t_sysnum = SYS_execve; /* * If we are executing init from zsched, we may have inherited its * parent process's signal mask. Clear it now so that we behave in * the same way as when started from the global zone. */ sigemptyset(&curthread->t_hold); brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE; again: error = exec_common((const char *)(uintptr_t)exec_fnamep, (const char **)(uintptr_t)uap, NULL, brand_action); /* * Normally we would just set lwp_argsaved and t_post_sys and * let post_syscall reset lwp_ap for us. Unfortunately, * exec_init isn't always called from a system call. Instead * of making a mess of trap_cleanup, we just reset the args * pointer here. */ reset_syscall_args(); switch (error) { case 0: return (0); case ENOENT: zcmn_err(p->p_zone->zone_id, CE_WARN, "exec(%s) failed (file not found).\n", initpath); return (ENOENT); case EAGAIN: case EINTR: ++count; if (count < 5) { zcmn_err(p->p_zone->zone_id, CE_WARN, "exec(%s) failed with errno %d. Retrying...\n", initpath, error); goto again; } } zcmn_err(p->p_zone->zone_id, CE_WARN, "exec(%s) failed with errno %d.", initpath, error); return (error); }
/*ARGSUSED*/ int zfsctl_snapdir_rename(struct inode *sdip, char *sname, struct inode *tdip, char *tname, cred_t *cr, int flags) { zfs_sb_t *zsb = ITOZSB(sdip); zfs_snapentry_t search, *sep; avl_index_t where; char *to, *from, *real; int error; ZFS_ENTER(zsb); to = kmem_alloc(MAXNAMELEN, KM_SLEEP); from = kmem_alloc(MAXNAMELEN, KM_SLEEP); real = kmem_alloc(MAXNAMELEN, KM_SLEEP); if (zsb->z_case == ZFS_CASE_INSENSITIVE) { error = dmu_snapshot_realname(zsb->z_os, sname, real, MAXNAMELEN, NULL); if (error == 0) { sname = real; } else if (error != ENOTSUP) { goto out; } } error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from); if (!error) error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to); if (!error) error = zfs_secpolicy_rename_perms(from, to, cr); if (error) goto out; /* * Cannot move snapshots out of the snapdir. */ if (sdip != tdip) { error = EINVAL; goto out; } /* * No-op when names are identical. */ if (strcmp(sname, tname) == 0) { error = 0; goto out; } mutex_enter(&zsb->z_ctldir_lock); error = dmu_objset_rename(from, to, B_FALSE); if (error) goto out_unlock; search.se_name = (char *)sname; sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); if (sep) zfsctl_rename_snap(zsb, sep, tname); out_unlock: mutex_exit(&zsb->z_ctldir_lock); out: kmem_free(from, MAXNAMELEN); kmem_free(to, MAXNAMELEN); kmem_free(real, MAXNAMELEN); ZFS_EXIT(zsb); return (error); }
/* ARGSUSED */ static int sha2_create_ctx_template(crypto_provider_handle_t provider, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, crypto_req_handle_t req) { sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); uint32_t sha_digest_len, sha_hmac_block_size; /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); /* * Allocate and initialize SHA2 context. */ sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t), crypto_kmflag(req)); if (sha2_hmac_ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); sha2_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type; if (keylen_in_bytes > sha_hmac_block_size) { uchar_t digested_key[SHA512_DIGEST_LENGTH]; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &sha2_hmac_ctx_tmpl->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, digested_key, sha_digest_len); } else { sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, key->ck_data, keylen_in_bytes); } *ctx_template = (crypto_spi_ctx_template_t)sha2_hmac_ctx_tmpl; *ctx_template_size = sizeof (sha2_hmac_ctx_t); return (CRYPTO_SUCCESS); }
int ELFNAME2(linux,copyargs)(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo, char **stackp, void *argp) { struct linux_extra_stack_data64 *esdp, esd; struct elf_args *ap; struct vattr *vap; Elf_Ehdr *eh; Elf_Phdr *ph; u_long phsize; Elf_Addr phdr = 0; int error; int i; if ((error = copyargs(l, pack, arginfo, stackp, argp)) != 0) return error; /* * Push extra arguments on the stack needed by dynamically * linked binaries and static binaries as well. */ memset(&esd, 0, sizeof(esd)); esdp = (struct linux_extra_stack_data64 *)(*stackp); ap = (struct elf_args *)pack->ep_emul_arg; vap = pack->ep_vap; eh = (Elf_Ehdr *)pack->ep_hdr; /* * We forgot this, so we need to reload it now. XXX keep track of it? */ if (ap == NULL) { phsize = eh->e_phnum * sizeof(Elf_Phdr); ph = (Elf_Phdr *)kmem_alloc(phsize, KM_SLEEP); error = exec_read_from(l, pack->ep_vp, eh->e_phoff, ph, phsize); if (error != 0) { for (i = 0; i < eh->e_phnum; i++) { if (ph[i].p_type == PT_PHDR) { phdr = ph[i].p_vaddr; break; } } } kmem_free(ph, phsize); } /* * The exec_package doesn't have a proc pointer and it's not * exactly trivial to add one since the credentials are * changing. XXX Linux uses curlwp's credentials. * Why can't we use them too? */ i = 0; esd.ai[i].a_type = LINUX_AT_HWCAP; esd.ai[i++].a_v = rcr4(); esd.ai[i].a_type = AT_PAGESZ; esd.ai[i++].a_v = PAGE_SIZE; esd.ai[i].a_type = LINUX_AT_CLKTCK; esd.ai[i++].a_v = hz; esd.ai[i].a_type = AT_PHDR; esd.ai[i++].a_v = (ap ? ap->arg_phaddr: phdr); esd.ai[i].a_type = AT_PHENT; esd.ai[i++].a_v = (ap ? ap->arg_phentsize : eh->e_phentsize); esd.ai[i].a_type = AT_PHNUM; esd.ai[i++].a_v = (ap ? ap->arg_phnum : eh->e_phnum); esd.ai[i].a_type = AT_BASE; esd.ai[i++].a_v = (ap ? ap->arg_interp : 0); esd.ai[i].a_type = AT_FLAGS; esd.ai[i++].a_v = 0; esd.ai[i].a_type = AT_ENTRY; esd.ai[i++].a_v = (ap ? ap->arg_entry : eh->e_entry); esd.ai[i].a_type = LINUX_AT_EGID; esd.ai[i++].a_v = ((vap->va_mode & S_ISGID) ? vap->va_gid : kauth_cred_getegid(l->l_cred)); esd.ai[i].a_type = LINUX_AT_GID; esd.ai[i++].a_v = kauth_cred_getgid(l->l_cred); esd.ai[i].a_type = LINUX_AT_EUID; esd.ai[i++].a_v = ((vap->va_mode & S_ISUID) ? vap->va_uid : kauth_cred_geteuid(l->l_cred)); esd.ai[i].a_type = LINUX_AT_UID; esd.ai[i++].a_v = kauth_cred_getuid(l->l_cred); esd.ai[i].a_type = LINUX_AT_SECURE; esd.ai[i++].a_v = 0; esd.ai[i].a_type = LINUX_AT_PLATFORM; esd.ai[i++].a_v = (Elf_Addr)&esdp->hw_platform[0]; esd.ai[i].a_type = LINUX_AT_RANDOM; esd.ai[i++].a_v = (Elf_Addr)&esdp->randbytes[0]; esd.randbytes[0] = cprng_strong32(); esd.randbytes[1] = cprng_strong32(); esd.randbytes[2] = cprng_strong32(); esd.randbytes[3] = cprng_strong32(); esd.ai[i].a_type = AT_NULL; esd.ai[i++].a_v = 0; KASSERT(i == LINUX_ELF_AUX_ENTRIES); strcpy(esd.hw_platform, LINUX_PLATFORM); exec_free_emul_arg(pack); /* * Copy out the ELF auxiliary table and hw platform name */ if ((error = copyout(&esd, esdp, sizeof(esd))) != 0) return error; *stackp += sizeof(esd); return 0; }
static int sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { int ret = CRYPTO_SUCCESS; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); uint_t sha_digest_len, sha_hmac_block_size; /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx), sizeof (sha2_hmac_ctx_t)); } else { /* no context template, compute context */ if (keylen_in_bytes > sha_hmac_block_size) { uchar_t digested_key[SHA512_DIGEST_LENGTH]; sha2_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &hmac_ctx->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx), digested_key, sha_digest_len); } else { sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx), key->ck_data, keylen_in_bytes); } } /* * Get the mechanism parameters, if applicable. */ if (mechanism->cm_type % 3 == 2) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) ret = CRYPTO_MECHANISM_PARAM_INVALID; PROV_SHA2_GET_DIGEST_LEN(mechanism, PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len); if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len) ret = CRYPTO_MECHANISM_PARAM_INVALID; } if (ret != CRYPTO_SUCCESS) { bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); ctx->cc_provider_private = NULL; } return (ret); }
/* Construct ECGroup from hex parameters and name, if any. Called by * ECGroup_fromHex and ECGroup_fromName. */ ECGroup * ecgroup_fromNameAndHex(const ECCurveName name, const ECCurveParams * params, int kmflag) { mp_int irr, curvea, curveb, genx, geny, order; int bits; ECGroup *group = NULL; mp_err res = MP_OKAY; /* initialize values */ MP_DIGITS(&irr) = 0; MP_DIGITS(&curvea) = 0; MP_DIGITS(&curveb) = 0; MP_DIGITS(&genx) = 0; MP_DIGITS(&geny) = 0; MP_DIGITS(&order) = 0; MP_CHECKOK(mp_init(&irr, kmflag)); MP_CHECKOK(mp_init(&curvea, kmflag)); MP_CHECKOK(mp_init(&curveb, kmflag)); MP_CHECKOK(mp_init(&genx, kmflag)); MP_CHECKOK(mp_init(&geny, kmflag)); MP_CHECKOK(mp_init(&order, kmflag)); MP_CHECKOK(mp_read_radix(&irr, params->irr, 16)); MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16)); MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16)); MP_CHECKOK(mp_read_radix(&genx, params->genx, 16)); MP_CHECKOK(mp_read_radix(&geny, params->geny, 16)); MP_CHECKOK(mp_read_radix(&order, params->order, 16)); /* determine number of bits */ bits = mpl_significant_bits(&irr) - 1; if (bits < MP_OKAY) { res = bits; goto CLEANUP; } /* determine which optimizations (if any) to use */ if (params->field == ECField_GFp) { #ifdef NSS_ECC_MORE_THAN_SUITE_B switch (name) { #ifdef ECL_USE_FP case ECCurve_SECG_PRIME_160R1: group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_secp160r1_fp(group)); break; #endif case ECCurve_SECG_PRIME_192R1: #ifdef ECL_USE_FP group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_nistp192_fp(group)); #else group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_gfp192(group, name)); #endif break; case ECCurve_SECG_PRIME_224R1: #ifdef ECL_USE_FP group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_nistp224_fp(group)); #else group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_gfp224(group, name)); #endif break; case ECCurve_SECG_PRIME_256R1: group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_gfp256(group, name)); break; case ECCurve_SECG_PRIME_521R1: group = ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } MP_CHECKOK(ec_group_set_gfp521(group, name)); break; default: /* use generic arithmetic */ #endif group = ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } #ifdef NSS_ECC_MORE_THAN_SUITE_B } } else if (params->field == ECField_GF2m) { group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx, &geny, &order, params->cofactor); if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } if ((name == ECCurve_NIST_K163) || (name == ECCurve_NIST_B163) || (name == ECCurve_SECG_CHAR2_163R1)) { MP_CHECKOK(ec_group_set_gf2m163(group, name)); } else if ((name == ECCurve_SECG_CHAR2_193R1) || (name == ECCurve_SECG_CHAR2_193R2)) { MP_CHECKOK(ec_group_set_gf2m193(group, name)); } else if ((name == ECCurve_NIST_K233) || (name == ECCurve_NIST_B233)) { MP_CHECKOK(ec_group_set_gf2m233(group, name)); } #endif } else { res = MP_UNDEF; goto CLEANUP; } /* set name, if any */ if ((group != NULL) && (params->text != NULL)) { #ifdef _KERNEL int n = strlen(params->text) + 1; group->text = kmem_alloc(n, kmflag); if (group->text == NULL) { res = MP_MEM; goto CLEANUP; } bcopy(params->text, group->text, n); group->text_len = n; #else group->text = strdup(params->text); if (group->text == NULL) { res = MP_MEM; } #endif } CLEANUP: mp_clear(&irr); mp_clear(&curvea); mp_clear(&curveb); mp_clear(&genx); mp_clear(&geny); mp_clear(&order); if (res != MP_OKAY) { ECGroup_free(group); return NULL; } return group; }
/* * rfs4_find_dr: * * Search for an entry in the duplicate request cache by * calculating the hash index based on the XID, and examining * the entries in the hash bucket. If we find a match, return. * Once we have searched the bucket we call rfs4_alloc_dr() to * allocate a new entry, or reuse one that is available. */ int rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) { uint32_t the_xid; list_t *dr_bkt; rfs4_dupreq_t *drp; int bktdex; /* * Get the XID, calculate the bucket and search to * see if we need to replay from the cache. */ the_xid = req->rq_xprt->xp_xid; bktdex = the_xid % drc->dr_hash; dr_bkt = (list_t *) &(drc->dr_buckets[(the_xid % drc->dr_hash)]); DTRACE_PROBE3(nfss__i__drc_bktdex, int, bktdex, uint32_t, the_xid, list_t *, dr_bkt); *dup = NULL; mutex_enter(&drc->lock); /* * Search the bucket for a matching xid and address. */ for (drp = list_head(dr_bkt); drp != NULL; drp = list_next(dr_bkt, drp)) { if (drp->dr_xid == the_xid && drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && bcmp((caddr_t)drp->dr_addr.buf, (caddr_t)req->rq_xprt->xp_rtaddr.buf, drp->dr_addr.len) == 0) { /* * Found a match so REPLAY the Reply */ if (drp->dr_state == NFS4_DUP_REPLAY) { rfs4_dr_chstate(drp, NFS4_DUP_INUSE); mutex_exit(&drc->lock); *dup = drp; DTRACE_PROBE1(nfss__i__drc_replay, rfs4_dupreq_t *, drp); return (NFS4_DUP_REPLAY); } /* * This entry must be in transition, so return * the 'pending' status. */ mutex_exit(&drc->lock); return (NFS4_DUP_PENDING); } } drp = rfs4_alloc_dr(drc); mutex_exit(&drc->lock); /* * The DRC is full and all entries are in use. Upper function * should error out this request and force the client to * retransmit -- effectively this is a resource issue. NFSD * threads tied up with native File System, or the cache size * is too small for the server load. */ if (drp == NULL) return (NFS4_DUP_ERROR); /* * Init the state to NEW. */ drp->dr_state = NFS4_DUP_NEW; /* * If needed, resize the address buffer */ if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { if (drp->dr_addr.buf != NULL) kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); if (drp->dr_addr.buf == NULL) { /* * If the malloc fails, mark the entry * as free and put on the tail. */ drp->dr_addr.maxlen = 0; drp->dr_state = NFS4_DUP_FREE; mutex_enter(&drc->lock); list_insert_tail(&(drc->dr_cache), drp); mutex_exit(&drc->lock); return (NFS4_DUP_ERROR); } } /* * Copy the address. */ drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, (caddr_t)drp->dr_addr.buf, drp->dr_addr.len); drp->dr_xid = the_xid; drp->dr_bkt = dr_bkt; /* * Insert at the head of the bucket and * the drc lists.. */ mutex_enter(&drc->lock); list_insert_head(&drc->dr_cache, drp); list_insert_head(dr_bkt, drp); mutex_exit(&drc->lock); *dup = drp; return (NFS4_DUP_NEW); }
/* * Shared implementation to inject a packet to or from an interface * Return value: * 0: successful * -1: memory allocation failed * 1: other errors */ static int ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, ip_stack_t *ipst) { ddi_taskq_t *tq = NULL; void (* func)(void *); injection_t *inject; mblk_t *mp; ASSERT(packet != NULL); ASSERT(packet->ni_packet != NULL); ASSERT(packet->ni_packet->b_datap->db_type == M_DATA); switch (style) { case NI_QUEUE_IN: inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP); if (inject == NULL) return (-1); inject->inj_data = *packet; inject->inj_isv6 = isv6; /* * deliver up into the kernel, immitating its reception by a * network interface, add to list and schedule timeout */ func = ip_ni_queue_in_func; tq = eventq_queue_in; break; case NI_QUEUE_OUT: inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP); if (inject == NULL) return (-1); inject->inj_data = *packet; inject->inj_isv6 = isv6; /* * deliver out of the kernel, as if it were being sent via a * raw socket so that IPFilter will see it again, add to list * and schedule timeout */ func = ip_ni_queue_out_func; tq = eventq_queue_out; break; case NI_DIRECT_OUT: { struct sockaddr *sock; mp = packet->ni_packet; sock = (struct sockaddr *)&packet->ni_addr; /* * ipfil_sendpkt was provided by surya to ease the * problems associated with sending out a packet. */ switch (ipfil_sendpkt(sock, mp, packet->ni_physical, netstackid_to_zoneid( ipst->ips_netstack->netstack_stackid))) { case 0 : case EINPROGRESS: return (0); case ECOMM : case ENONET : return (1); default : return (1); } /* NOTREACHED */ } default: freemsg(packet->ni_packet); return (1); } ASSERT(tq != NULL); inject->inj_ptr = ipst; if (ddi_taskq_dispatch(tq, func, (void *)inject, DDI_SLEEP) == DDI_FAILURE) { ip2dbg(("ip_inject: ddi_taskq_dispatch failed\n")); freemsg(packet->ni_packet); return (1); } return (0); }
static int sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { int ret = CRYPTO_SUCCESS; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx), sizeof (sha1_hmac_ctx_t)); } else { /* no context template, compute context */ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { uchar_t digested_key[SHA1_DIGEST_LENGTH]; sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), digested_key, SHA1_DIGEST_LENGTH); } else { sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), key->ck_data, keylen_in_bytes); } } /* * Get the mechanism parameters, if applicable. */ PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) ret = CRYPTO_MECHANISM_PARAM_INVALID; PROV_SHA1_GET_DIGEST_LEN(mechanism, PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len); if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len > SHA1_DIGEST_LENGTH) ret = CRYPTO_MECHANISM_PARAM_INVALID; } if (ret != CRYPTO_SUCCESS) { bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); ctx->cc_provider_private = NULL; } return (ret); }
/* ARGSUSED */ int mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; struct mfs_args *args = data; struct ufsmount *ump; struct fs *fs; struct mfsnode *mfsp; struct proc *p; int flags, error = 0; if (*data_len < sizeof *args) return EINVAL; p = l->l_proc; if (mp->mnt_flag & MNT_GETARGS) { struct vnode *vp; ump = VFSTOUFS(mp); if (ump == NULL) return EIO; vp = ump->um_devvp; if (vp == NULL) return EIO; mfsp = VTOMFS(vp); if (mfsp == NULL) return EIO; args->fspec = NULL; args->base = mfsp->mfs_baseoff; args->size = mfsp->mfs_size; *data_len = sizeof *args; return 0; } /* * XXX turn off async to avoid hangs when writing lots of data. * the problem is that MFS needs to allocate pages to clean pages, * so if we wait until the last minute to clean pages then there * may not be any pages available to do the cleaning. * ... and since the default partially-synchronous mode turns out * to not be sufficient under heavy load, make it full synchronous. */ mp->mnt_flag &= ~MNT_ASYNC; mp->mnt_flag |= MNT_SYNCHRONOUS; /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ffs_flushfiles(mp, flags, l); if (error) return (error); } if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) fs->fs_ronly = 0; if (args->fspec == NULL) return EINVAL; return (0); } error = getnewvnode(VT_MFS, NULL, mfs_vnodeop_p, NULL, &devvp); if (error) return (error); devvp->v_vflag |= VV_MPSAFE; devvp->v_type = VBLK; spec_node_init(devvp, makedev(255, mfs_minor)); mfs_minor++; mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP); devvp->v_data = mfsp; mfsp->mfs_baseoff = args->base; mfsp->mfs_size = args->size; mfsp->mfs_vnode = devvp; mfsp->mfs_proc = p; mfsp->mfs_shutdown = 0; cv_init(&mfsp->mfs_cv, "mfsidl"); mfsp->mfs_refcnt = 1; bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0); if ((error = ffs_mountfs(devvp, mp, l)) != 0) { mfsp->mfs_shutdown = 1; vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_fs; error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); if (error) return error; (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, sizeof(fs->fs_fsmnt)); fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0'; /* XXX: cleanup on error */ return 0; }
static int gen_create_minor_nodes(dev_info_t *devi, struct dstate *dstatep) { int rval = DDI_SUCCESS; char *node_name; node_name = ddi_node_name(devi); if (strcmp(node_name, "disk_chan") == 0) { rval = gen_create_mn_disk_chan(devi); } else if (strcmp(node_name, "disk_wwn") == 0) { rval = gen_create_mn_disk_wwn(devi); } else if (strcmp(node_name, "disk_cdrom") == 0) { rval = gen_create_mn_disk_cdrom(devi); } else if (strcmp(node_name, "disk_fd") == 0) { rval = gen_create_mn_disk_fd(devi); } else if (strcmp(node_name, "cgtwenty") == 0) { rval = gen_create_display(devi); } else if (strcmp(node_name, "genzs") == 0) { rval = gen_create_serial(devi); } else if (strcmp(node_name, "net") == 0) { rval = gen_create_net(devi); } else { int instance = ddi_get_instance(devi); char *node_type; /* * Solaris may directly hang the node_type off the minor node * (without making a copy). Since we free the node_type * property below we need to make a private copy to pass * to ddi_create_minor_node to avoid devinfo snapshot panics. * We store a pointer to our copy in dstate and free it in * gen_detach after the minor nodes have been deleted by * ddi_remove_minor_node. */ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "node-type", &node_type) != 0) { cmn_err(CE_WARN, "couldn't get node-type\n"); return (DDI_FAILURE); } if (node_type) { dstatep->node_type = kmem_alloc( strlen(node_type) + 1, KM_SLEEP); (void) strcpy(dstatep->node_type, node_type); } ddi_prop_free(node_type); /* the minor name is the same as the node name */ if (ddi_create_minor_node(devi, node_name, S_IFCHR, (INST_TO_MINOR(instance)), dstatep->node_type, NULL) != DDI_SUCCESS) { if (dstatep->node_type) { kmem_free(dstatep->node_type, strlen(dstatep->node_type) + 1); dstatep->node_type = NULL; } return (DDI_FAILURE); } return (DDI_SUCCESS); } if (rval != DDI_SUCCESS) { ddi_prop_remove_all(devi); ddi_remove_minor_node(devi, NULL); } return (rval); }