/*ARGSUSED*/ int size_pse_array(pgcnt_t npg, int ncpu) { size_t size; pgcnt_t pp_per_mb = (1024 * 1024) / PAGESIZE; size = MAX(128, MIN(npg / pp_per_mb, 2 * ncpu * ncpu)); size += (1 << (highbit(size) - 1)) - 1; return (highbit(size) - 1); }
int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) { dmu_object_info_t doi; int err; err = dmu_object_info(mos, object, &doi); if (err) return (err); mutex_enter(&bpl->bpl_lock); ASSERT(bpl->bpl_dbuf == NULL); ASSERT(bpl->bpl_phys == NULL); ASSERT(bpl->bpl_cached_dbuf == NULL); ASSERT(bpl->bpl_queue == NULL); ASSERT(object != 0); ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); bpl->bpl_mos = mos; bpl->bpl_object = object; bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); mutex_exit(&bpl->bpl_lock); return (0); }
template <typename T> std::ostringstream &AttributeBitmap::bin(T &value, std::ostringstream &o) { for (T bit = highbit(bit); bit; bit >>= 1 ) { o << ( ( value & bit ) ? '1' : '0' ); } return o; }
/* * Initialise the TPI support routines. Called from strinit(). */ void tpi_init() { mutex_init(&tpi_provinfo_lock, NULL, MUTEX_DEFAULT, NULL); /* * Calculate the right shift for hashing a tpi_provinfo_t. */ tpi_hashshift = highbit(sizeof (tpi_provinfo_t)); }
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags) { dmu_buf_t *db; zap_leaf_t *l; int i; zap_phys_t *zp; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); zap->zap_ismicro = FALSE; (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, &zap->zap_f.zap_phys, zap_evict); mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1; zp = zap->zap_f.zap_phys; /* * explicitly zero it since it might be coming from an * initialized microzap */ bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size); zp->zap_block_type = ZBT_HEADER; zp->zap_magic = ZAP_MAGIC; zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap); zp->zap_freeblk = 2; /* block 1 will be the first leaf */ zp->zap_num_leafs = 1; zp->zap_num_entries = 0; zp->zap_salt = zap->zap_salt; zp->zap_normflags = zap->zap_normflags; zp->zap_flags = flags; /* block 1 will be the first leaf */ for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++) ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1; /* * set up block 1 - the first leaf */ VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH)); dmu_buf_will_dirty(db, tx); l = kmem_zalloc(sizeof (zap_leaf_t), KM_PUSHPAGE); l->l_dbuf = db; l->l_phys = db->db_data; zap_leaf_init(l, zp->zap_normflags != 0); kmem_free(l, sizeof (zap_leaf_t)); dmu_buf_rele(db, FTAG); }
void zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) { int i; zap_leaf_t l; l.l_bs = highbit(size)-1; l.l_phys = buf; buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); struct zap_leaf_entry *le; switch (lc->l_free.lf_type) { case ZAP_CHUNK_ENTRY: le = &lc->l_entry; le->le_type = BSWAP_8(le->le_type); le->le_value_intlen = BSWAP_8(le->le_value_intlen); le->le_next = BSWAP_16(le->le_next); le->le_name_chunk = BSWAP_16(le->le_name_chunk); le->le_name_numints = BSWAP_16(le->le_name_numints); le->le_value_chunk = BSWAP_16(le->le_value_chunk); le->le_value_numints = BSWAP_16(le->le_value_numints); le->le_cd = BSWAP_32(le->le_cd); le->le_hash = BSWAP_64(le->le_hash); break; case ZAP_CHUNK_FREE: lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); break; case ZAP_CHUNK_ARRAY: lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); /* la_array doesn't need swapping */ break; default: ASSERT(!"bad leaf type"); } } }
int xskin_getcolor( Display *d, int r, int g, int b ) { int r0,g0,b0; sc = DefaultScreen( d ); cmap = DefaultColormap( d, sc ); rshift = 15-highbit(xskin_vis->red_mask); gshift = 15-highbit(xskin_vis->green_mask); bshift = 15-highbit(xskin_vis->blue_mask); if ( iscolorinited==0 ) { iscolorinited=1; for ( r0=0 ; r0<8 ; r0++ ) { for ( g0=0 ; g0<8 ; g0++ ) { for ( b0=0 ; b0<8 ; b0++ ) { cols[r0][g0][b0]=-1; } } } } return GetColor( d, r, g, b ); }
void zap_leaf_init(zap_leaf_t *l) { int i; l->l_bs = highbit(l->l_dbuf->db_size)-1; zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; } ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); }
/* * hermon_cfg_wqe_sizes() * Context: Only called from attach() path context */ static void hermon_cfg_wqe_sizes(hermon_state_t *state, hermon_cfg_profile_t *cp) { uint_t max_size, log2; uint_t max_sgl, real_max_sgl; /* * Get the requested maximum number SGL per WQE from the Hermon * patchable variable */ max_sgl = hermon_wqe_max_sgl; /* * Use requested maximum number of SGL to calculate the max descriptor * size (while guaranteeing that the descriptor size is a power-of-2 * cachelines). We have to use the calculation for QP1 MLX transport * because the possibility that we might need to inline a GRH, along * with all the other headers and alignment restrictions, sets the * maximum for the number of SGLs that we can advertise support for. */ max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } max_size = (1 << log2); max_size = min(max_size, state->hs_devlim.max_desc_sz_sq); /* * Then use the calculated max descriptor size to determine the "real" * maximum SGL (the number beyond which we would roll over to the next * power-of-2). */ real_max_sgl = (max_size - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4; /* Then save away this configuration information */ cp->cp_wqe_max_sgl = max_sgl; cp->cp_wqe_real_max_sgl = real_max_sgl; /* SRQ SGL gets set to it's own patchable variable value */ cp->cp_srq_max_sgl = hermon_srq_max_sgl; }
/* * tavor_srq_sgl_to_logwqesz() * Context: Can be called from interrupt or base context. */ static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) { uint_t max_size, log2, actual_sgl; TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz); switch (wq_type) { case TAVOR_QP_WQ_TYPE_RECVQ: /* * Use requested maximum SGL to calculate max descriptor size * (while guaranteeing that the descriptor size is a * power-of-2 cachelines). */ max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if ((max_size & (max_size - 1)) == 0) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4; break; default: TAVOR_WARNING(state, "unexpected work queue type"); TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail, TAVOR_TNF_ERROR, ""); break; } /* Fill in the return values */ *logwqesz = log2; *max_sgl = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl); TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { struct vdev_disk *dvd; int error; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } /* * Open the device if it's not currently open, otherwise just update * the physical size of the device. */ if (vd->vdev_tsd == NULL) { dvd = vd->vdev_tsd = kmem_zalloc(sizeof(struct vdev_disk), KM_SLEEP); error = device_open(vd->vdev_path + 5, DO_RDWR, &dvd->device); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return error; } } else { ASSERT(vd->vdev_reopening); dvd = vd->vdev_tsd; } /* * Determine the actual size of the device. */ *max_psize = *psize = dvd->device->size; *ashift = highbit(MAX(DEV_BSIZE, SPA_MINBLOCKSIZE)) - 1; return 0; }
int nfs4_rnode_init(void) { ulong_t nrnode4_max; int i; /* * Compute the size of the rnode4 hash table */ if (nrnode <= 0) nrnode = ncsize; nrnode4_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4)); if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) { zcmn_err(GLOBAL_ZONEID, CE_NOTE, "setting nrnode to max value of %ld", nrnode4_max); nrnode = nrnode4_max; } rtable4size = 1 << highbit(nrnode / rnode4_hashlen); rtable4mask = rtable4size - 1; /* * Allocate and initialize the hash buckets */ rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP); for (i = 0; i < rtable4size; i++) { rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]); rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]); rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL); } rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t), 0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0); return (0); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd = vd->vdev_tsd; vnode_t *devvp = NULLVP; vfs_context_t context = NULL; uint64_t blkcnt; uint32_t blksize; int fmode = 0; int error = 0; int isssd; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (dvd != NULL) { if (dvd->vd_offline) { /* * If we are opening a device in its offline notify * context, the LDI handle was just closed. Clean * up the LDI event callbacks and free vd->vdev_tsd. */ vdev_disk_free(vd); } else { ASSERT(vd->vdev_reopening); devvp = dvd->vd_devvp; goto skip_open; } } /* * Create vd->vdev_tsd. */ vdev_disk_alloc(vd); dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the same device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ /* ### APPLE TODO ### */ #ifdef illumos if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } } #endif error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { context = vfs_context_create( spl_vfs_context_kernel() ); /* Obtain an opened/referenced vnode for the device. */ if ((error = vnode_open(vd->vdev_path, spa_mode(spa), 0, 0, &devvp, context))) { goto out; } if (!vnode_isblk(devvp)) { error = ENOTBLK; goto out; } /* * ### APPLE TODO ### * vnode_authorize devvp for KAUTH_VNODE_READ_DATA and * KAUTH_VNODE_WRITE_DATA */ /* * Disallow opening of a device that is currently in use. * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) { goto out; } if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) { error = ENOTBLK; goto out; } if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) { goto out; } } else { goto out; } int len = MAXPATHLEN; if (vn_getpath(devvp, dvd->vd_readlinkname, &len) == 0) { dprintf("ZFS: '%s' resolved name is '%s'\n", vd->vdev_path, dvd->vd_readlinkname); } else { dvd->vd_readlinkname[0] = 0; } skip_open: /* * Determine the actual size of the device. */ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context) != 0 || VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context) != 0) { error = EINVAL; goto out; } *psize = blkcnt * (uint64_t)blksize; *max_psize = *psize; dvd->vd_ashift = highbit(blksize) - 1; dprintf("vdev_disk: Device %p ashift set to %d\n", devvp, dvd->vd_ashift); *ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1; /* * ### APPLE TODO ### */ #ifdef illumos if (vd->vdev_wholedisk == 1) { int wce = 1; if (error == 0) { /* * If we have the capability to expand, we'd have * found out via success from DKIOCGMEDIAINFO{,EXT}. * Adjust max_psize upward accordingly since we know * we own the whole disk now. */ *max_psize = capacity * blksz; } /* * Since we own the whole disk, try to enable disk write * caching. We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } #endif /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; /* Inform the ZIO pipeline that we are non-rotational */ vd->vdev_nonrot = B_FALSE; if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) { if (isssd) vd->vdev_nonrot = B_TRUE; } dprintf("ZFS: vdev_disk(%s) isSSD %d\n", vd->vdev_path ? vd->vdev_path : "", isssd); dvd->vd_devvp = devvp; out: if (error) { if (devvp) { vnode_close(devvp, fmode, context); dvd->vd_devvp = NULL; } vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; } if (context) (void) vfs_context_rele(context); if (error) printf("ZFS: vdev_disk_open('%s') failed error %d\n", vd->vdev_path ? vd->vdev_path : "", error); return (error); }
/* * tavor_srq_alloc() * Context: Can be called only from user or kernel context. */ int tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo, uint_t sleepflag, tavor_srq_options_t *op) { ibt_srq_hdl_t ibt_srqhdl; tavor_pdhdl_t pd; ibt_srq_sizes_t *sizes; ibt_srq_sizes_t *real_sizes; tavor_srqhdl_t *srqhdl; ibt_srq_flags_t flags; tavor_rsrc_t *srqc, *rsrc; tavor_hw_srqc_t srqc_entry; uint32_t *buf; tavor_srqhdl_t srq; tavor_umap_db_entry_t *umapdb; ibt_mr_attr_t mr_attr; tavor_mr_options_t mr_op; tavor_mrhdl_t mr; uint64_t addr; uint64_t value, srq_desc_off; uint32_t lkey; uint32_t log_srq_size; uint32_t uarpg; uint_t wq_location, dma_xfer_mode, srq_is_umap; int flag, status; char *errormsg; uint_t max_sgl; uint_t wqesz; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes)) TAVOR_TNF_ENTER(tavor_srq_alloc); /* * Check the "options" flag. Currently this flag tells the driver * whether or not the SRQ's work queues should be come from normal * system memory or whether they should be allocated from DDR memory. */ if (op == NULL) { wq_location = TAVOR_QUEUE_LOCATION_NORMAL; } else { wq_location = op->srqo_wq_loc; } /* * Extract the necessary info from the tavor_srq_info_t structure */ real_sizes = srqinfo->srqi_real_sizes; sizes = srqinfo->srqi_sizes; pd = srqinfo->srqi_pd; ibt_srqhdl = srqinfo->srqi_ibt_srqhdl; flags = srqinfo->srqi_flags; srqhdl = srqinfo->srqi_srqhdl; /* * Determine whether SRQ is being allocated for userland access or * whether it is being allocated for kernel access. If the SRQ is * being allocated for userland access, then lookup the UAR doorbell * page number for the current process. Note: If this is not found * (e.g. if the process has not previously open()'d the Tavor driver), * then an error is returned. */ srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0; if (srq_is_umap) { status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(), MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page"); goto srqalloc_fail3; } uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx; } /* Increase PD refcnt */ tavor_pd_refcnt_inc(pd); /* Allocate an SRQ context entry */ status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context"); goto srqalloc_fail1; } /* Allocate the SRQ Handle entry */ status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle"); goto srqalloc_fail2; } srq = (tavor_srqhdl_t)rsrc->tr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)) /* Calculate the SRQ number */ tavor_srq_numcalc(state, srqc->tr_indx, &srq->srq_srqnum); /* * If this will be a user-mappable SRQ, then allocate an entry for * the "userland resources database". This will later be added to * the database (after all further SRQ operations are successful). * If we fail here, we must undo the reference counts and the * previous resource allocation. */ if (srq_is_umap) { umapdb = tavor_umap_db_alloc(state->ts_instance, srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, (uint64_t)(uintptr_t)rsrc); if (umapdb == NULL) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); goto srqalloc_fail3; } } /* * Calculate the appropriate size for the SRQ. * Note: All Tavor SRQs must be a power-of-2 in size. Also * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step * is to round the requested size up to the next highest power-of-2 */ sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE); log_srq_size = highbit(sizes->srq_wr_sz); if ((sizes->srq_wr_sz & (sizes->srq_wr_sz - 1)) == 0) { log_srq_size = log_srq_size - 1; } /* * Next we verify that the rounded-up size is valid (i.e. consistent * with the device limits and/or software-configured limits). If not, * then obviously we have a lot of cleanup to do before returning. */ if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); goto srqalloc_fail4; } /* * Next we verify that the requested number of SGL is valid (i.e. * consistent with the device limits and/or software-configured * limits). If not, then obviously the same cleanup needs to be done. */ max_sgl = state->ts_cfg_profile->cp_srq_max_sgl; if (sizes->srq_sgl_sz > max_sgl) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL"); goto srqalloc_fail4; } /* * Determine the SRQ's WQE sizes. This depends on the requested * number of SGLs. Note: This also has the side-effect of * calculating the real number of SGLs (for the calculated WQE size) */ tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz, TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz, &srq->srq_wq_sgl); /* * Allocate the memory for SRQ work queues. Note: The location from * which we will allocate these work queues has been passed in through * the tavor_qp_options_t structure. Since Tavor work queues are not * allowed to cross a 32-bit (4GB) boundary, the alignment of the work * queue memory is very important. We used to allocate work queues * (the combined receive and send queues) so that they would be aligned * on their combined size. That alignment guaranteed that they would * never cross the 4GB boundary (Tavor work queues are on the order of * MBs at maximum). Now we are able to relax this alignment constraint * by ensuring that the IB address assigned to the queue memory (as a * result of the tavor_mr_register() call) is offset from zero. * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to * guarantee the alignment, but when attempting to use IOMMU bypass * mode we found that we were not allowed to specify any alignment that * was more restrictive than the system page size. So we avoided this * constraint by passing two alignment values, one for the memory * allocation itself and the other for the DMA handle (for later bind). * This used to cause more memory than necessary to be allocated (in * order to guarantee the more restrictive alignment contraint). But * be guaranteeing the zero-based IB virtual address for the queue, we * are able to conserve this memory. * * Note: If SRQ is not user-mappable, then it may come from either * kernel system memory or from HCA-attached local DDR memory. * * Note2: We align this queue on a pagesize boundary. This is required * to make sure that all the resulting IB addresses will start at 0, for * a zero-based queue. By making sure we are aligned on at least a * page, any offset we use into our queue will be the same as when we * perform tavor_srq_modify() operations later. */ wqesz = (1 << srq->srq_wq_log_wqesz); srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz; srq->srq_wqinfo.qa_alloc_align = PAGESIZE; srq->srq_wqinfo.qa_bind_align = PAGESIZE; if (srq_is_umap) { srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; } else { srq->srq_wqinfo.qa_location = wq_location; } status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); goto srqalloc_fail4; } buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) /* * Register the memory for the SRQ work queues. The memory for the SRQ * must be registered in the Tavor TPT tables. This gives us the LKey * to specify in the SRQ context later. Note: If the work queue is to * be allocated from DDR memory, then only a "bypass" mapping is * appropriate. And if the SRQ memory is user-mappable, then we force * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment * restriction, we pass the "mro_bind_override_addr" flag in the call * to tavor_mr_register(). This guarantees that the resulting IB vaddr * will be zero-based (modulo the offset into the first page). If we * fail here, we still have the bunch of resource and reference count * cleanup to do. */ flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; mr_attr.mr_len = srq->srq_wqinfo.qa_size; mr_attr.mr_as = NULL; mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; if (srq_is_umap) { mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; } else { if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; if (dma_xfer_mode == DDI_DMA_STREAMING) { mr_attr.mr_flags |= IBT_MR_NONCOHERENT; } } else { mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS; } } mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl; mr_op.mro_bind_override_addr = 1; status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); goto srqalloc_fail5; } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) addr = mr->mr_bindinfo.bi_addr; lkey = mr->mr_lkey; /* * Calculate the offset between the kernel virtual address space * and the IB virtual address space. This will be used when * posting work requests to properly initialize each WQE. */ srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned - (uint64_t)mr->mr_bindinfo.bi_addr; /* * Create WQL and Wridlist for use by this SRQ */ srq->srq_wrid_wql = tavor_wrid_wql_create(state); if (srq->srq_wrid_wql == NULL) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create"); goto srqalloc_fail6; } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql))) srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size); if (srq->srq_wridlist == NULL) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create"); goto srqalloc_fail7; } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist))) srq->srq_wridlist->wl_srq_en = 1; srq->srq_wridlist->wl_free_list_indx = -1; /* * Fill in all the return arguments (if necessary). This includes * real queue size and real SGLs. */ if (real_sizes != NULL) { real_sizes->srq_wr_sz = (1 << log_srq_size); real_sizes->srq_sgl_sz = srq->srq_wq_sgl; } /* * Fill in the SRQC entry. This is the final step before passing * ownership of the SRQC entry to the Tavor hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the SRQC. Note: If this SRQ is going to be * used for userland access, then we need to set the UAR page number * appropriately (otherwise it's a "don't care") */ bzero(&srqc_entry, sizeof (tavor_hw_srqc_t)); srqc_entry.wqe_addr_h = (addr >> 32); srqc_entry.next_wqe_addr_l = 0; srqc_entry.ds = (wqesz >> 4); srqc_entry.state = TAVOR_SRQ_STATE_HW_OWNER; srqc_entry.pd = pd->pd_pdnum; srqc_entry.lkey = lkey; srqc_entry.wqe_cnt = 0; if (srq_is_umap) { srqc_entry.uar = uarpg; } else { srqc_entry.uar = 0; } /* * Write the SRQC entry to hardware. Lastly, we pass ownership of * the entry to the hardware (using the Tavor SW2HW_SRQ firmware * command). Note: In general, this operation shouldn't fail. But * if it does, we have to undo everything we've done above before * returning error. */ status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry, sizeof (tavor_hw_srqc_t), srq->srq_srqnum, sleepflag); if (status != TAVOR_CMD_SUCCESS) { cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n", status); TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail, TAVOR_TNF_ERROR, "", tnf_uint, status, status); /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command"); goto srqalloc_fail8; } /* * Fill in the rest of the Tavor SRQ handle. We can update * the following fields for use in further operations on the SRQ. */ srq->srq_srqcrsrcp = srqc; srq->srq_rsrcp = rsrc; srq->srq_mrhdl = mr; srq->srq_refcnt = 0; srq->srq_is_umap = srq_is_umap; srq->srq_uarpg = (srq->srq_is_umap) ? uarpg : 0; srq->srq_umap_dhp = (devmap_cookie_t)NULL; srq->srq_pdhdl = pd; srq->srq_wq_lastwqeindx = -1; srq->srq_wq_bufsz = (1 << log_srq_size); srq->srq_wq_buf = buf; srq->srq_desc_off = srq_desc_off; srq->srq_hdlrarg = (void *)ibt_srqhdl; srq->srq_state = 0; srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl; /* Determine if later ddi_dma_sync will be necessary */ srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); /* * Put SRQ handle in Tavor SRQNum-to-SRQhdl list. Then fill in the * "srqhdl" and return success */ ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL); state->ts_srqhdl[srqc->tr_indx] = srq; /* * If this is a user-mappable SRQ, then we need to insert the * previously allocated entry into the "userland resources database". * This will allow for later lookup during devmap() (i.e. mmap()) * calls. */ if (srq->srq_is_umap) { tavor_umap_db_add(umapdb); } else { mutex_enter(&srq->srq_wrid_wql->wql_lock); tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0); mutex_exit(&srq->srq_wrid_wql->wql_lock); } *srqhdl = srq; TAVOR_TNF_EXIT(tavor_srq_alloc); return (status); /* * The following is cleanup for all possible failure cases in this routine */ srqalloc_fail8: kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size * sizeof (tavor_wrid_entry_t)); kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t)); srqalloc_fail7: tavor_wql_refcnt_dec(srq->srq_wrid_wql); srqalloc_fail6: if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) { TAVOR_WARNING(state, "failed to deregister SRQ memory"); } srqalloc_fail5: tavor_queue_free(state, &srq->srq_wqinfo); srqalloc_fail4: if (srq_is_umap) { tavor_umap_db_free(umapdb); } srqalloc_fail3: tavor_rsrc_free(state, &rsrc); srqalloc_fail2: tavor_rsrc_free(state, &srqc); srqalloc_fail1: tavor_pd_refcnt_dec(pd); srqalloc_fail: TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, errormsg); TAVOR_TNF_EXIT(tavor_srq_alloc); return (status); }
/* * hermon_cfg_profile_init_phase2() * Context: Only called from attach() path context */ int hermon_cfg_profile_init_phase2(hermon_state_t *state) { hermon_cfg_profile_t *cp; hermon_hw_querydevlim_t *devlim; hermon_hw_query_port_t *port; uint32_t num, size; int i; /* Read in the device limits */ devlim = &state->hs_devlim; /* and the port information */ port = &state->hs_queryport; /* Read the configuration profile */ cp = state->hs_cfg_profile; /* * We configure all Hermon HCAs with the same profile, which * is based upon the default value assignments above. If we want to * add additional profiles in the future, they can be added here. * Note the reference to "Memfree" is a holdover from Arbel/Sinai */ if (state->hs_cfg_profile_setting != HERMON_CFG_MEMFREE) { return (DDI_FAILURE); } /* * Note for most configuration parameters, we use the lesser of our * desired configuration value or the device-defined maximum value. */ cp->cp_log_num_mtt = min(hermon_log_num_mtt, devlim->log_max_mtt); cp->cp_log_num_dmpt = min(hermon_log_num_dmpt, devlim->log_max_dmpt); cp->cp_log_num_cmpt = HERMON_LOG_CMPT_PER_TYPE + 2; /* times 4, */ /* per PRM */ cp->cp_log_max_mrw_sz = min(hermon_log_max_mrw_sz, devlim->log_max_mrw_sz); cp->cp_log_num_pd = min(hermon_log_num_pd, devlim->log_max_pd); cp->cp_log_num_qp = min(hermon_log_num_qp, devlim->log_max_qp); cp->cp_log_num_cq = min(hermon_log_num_cq, devlim->log_max_cq); cp->cp_log_num_srq = min(hermon_log_num_srq, devlim->log_max_srq); cp->cp_log_num_eq = min(hermon_log_num_eq, devlim->log_max_eq); cp->cp_log_eq_sz = min(hermon_log_eq_sz, devlim->log_max_eq_sz); cp->cp_log_num_rdb = cp->cp_log_num_qp + min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp = 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); cp->cp_num_qp_per_mcg = max(hermon_num_qp_per_mcg, HERMON_NUM_QP_PER_MCG_MIN); cp->cp_num_qp_per_mcg = min(cp->cp_num_qp_per_mcg, (1 << devlim->log_max_qp_mcg) - 8); cp->cp_num_qp_per_mcg = (1 << highbit(cp->cp_num_qp_per_mcg + 7)) - 8; cp->cp_log_num_mcg = min(hermon_log_num_mcg, devlim->log_max_mcg); cp->cp_log_num_mcg_hash = hermon_log_num_mcg_hash; /* until srq_resize is debugged, disable it */ cp->cp_srq_resize_enabled = 0; /* cp->cp_log_num_uar = hermon_log_num_uar; */ /* * now, we HAVE to calculate the number of UAR pages, so that we can * get the blueflame stuff correct as well */ size = devlim->log_max_uar_sz; /* 1MB (2^^20) times size (2^^size) / sparc_pg (2^^13) */ num = (20 + size) - 13; /* XXX - consider using PAGESHIFT */ if (devlim->blu_flm) num -= 1; /* if blueflame, only half the size for UARs */ cp->cp_log_num_uar = min(hermon_log_num_uar, num); /* while we're at it, calculate the index of the kernel uar page */ /* either the reserved uar's or 128, whichever is smaller */ state->hs_kernel_uar_index = (devlim->num_rsvd_uar > 128) ? devlim->num_rsvd_uar : 128; cp->cp_log_max_pkeytbl = port->log_max_pkey; cp->cp_log_max_qp_sz = devlim->log_max_qp_sz; cp->cp_log_max_cq_sz = devlim->log_max_cq_sz; cp->cp_log_max_srq_sz = devlim->log_max_srq_sz; cp->cp_log_max_gidtbl = port->log_max_gid; cp->cp_max_mtu = port->ib_mtu; /* XXX now from query_port */ cp->cp_max_port_width = port->ib_port_wid; /* now from query_port */ cp->cp_max_vlcap = port->max_vl; cp->cp_log_num_ah = hermon_log_num_ah; /* Paranoia, ensure no arrays indexed by port_num are out of bounds */ cp->cp_num_ports = devlim->num_ports; if (cp->cp_num_ports > HERMON_MAX_PORTS) { cmn_err(CE_CONT, "device has more ports (%d) than are " "supported; Using %d ports\n", cp->cp_num_ports, HERMON_MAX_PORTS); cp->cp_num_ports = HERMON_MAX_PORTS; }; /* allocate variable sized arrays */ for (i = 0; i < HERMON_MAX_PORTS; i++) { state->hs_pkey[i] = kmem_zalloc((1 << cp->cp_log_max_pkeytbl) * sizeof (ib_pkey_t), KM_SLEEP); state->hs_guid[i] = kmem_zalloc((1 << cp->cp_log_max_gidtbl) * sizeof (ib_guid_t), KM_SLEEP); } /* Determine WQE sizes from requested max SGLs */ hermon_cfg_wqe_sizes(state, cp); /* Set whether to use MSIs or not */ cp->cp_use_msi_if_avail = hermon_use_msi_if_avail; #if !defined(_ELF64) /* * Need to reduce the hermon kernel virtual memory footprint * on 32-bit kernels. */ cp->cp_log_num_mtt -= 6; cp->cp_log_num_dmpt -= 6; cp->cp_log_num_pd -= 6; cp->cp_log_num_qp -= 6; cp->cp_log_num_cq -= 6; cp->cp_log_num_srq -= 6; cp->cp_log_num_rdb = cp->cp_log_num_qp + min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp = 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); #endif return (DDI_SUCCESS); }
void vpm_init() { long npages; struct vpmap *vpm; struct vpmfree *vpmflp; int i, ndx; extern void prefetch_smap_w(void *); if (!vpm_cache_enable) { return; } /* * Set the size of the cache. */ vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); if (vpm_cache_size < VPMAP_MINCACHE) { vpm_cache_size = VPMAP_MINCACHE; } /* * Number of freelists. */ if (vpm_nfreelist == 0) { vpm_nfreelist = max_ncpus; } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { cmn_err(CE_WARN, "vpmap create : number of freelist " "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); vpm_nfreelist = 2 * max_ncpus; } /* * Round it up to the next power of 2 */ if (vpm_nfreelist & (vpm_nfreelist - 1)) { vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); } vpmd_freemsk = vpm_nfreelist - 1; /* * Use a per cpu rotor index to spread the allocations evenly * across the available vpm freelists. */ vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); ndx = 0; for (i = 0; i < max_ncpus; i++) { vpmd_cpu[i].vfree_ndx = ndx; ndx = (ndx + 1) & vpmd_freemsk; } /* * Allocate and initialize the freelist. */ vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), KM_SLEEP); for (i = 0; i < vpm_nfreelist; i++) { vpmflp = &vpmd_free[i]; /* * Set up initial queue pointers. They will get flipped * back and forth. */ vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; } npages = mmu_btop(vpm_cache_size); /* * Allocate and initialize the vpmap structs. */ vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP); for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) { struct vpmfree *vpmflp; union vpm_freeq *releq; struct vpmap *vpmapf; /* * Use prefetch as we have to walk thru a large number of * these data structures. We just use the smap's prefetch * routine as it does the same. This should work fine * for x64(this needs to be modifed when enabled on sparc). */ prefetch_smap_w((void *)vpm); vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); vpmflp = VPMAP2VMF(vpm); releq = vpmflp->vpm_releq; vpmapf = releq->vpmq_free; if (vpmapf == NULL) { releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; } else { vpm->vpm_next = vpmapf; vpm->vpm_prev = vpmapf->vpm_prev; vpmapf->vpm_prev = vpm; vpm->vpm_prev->vpm_next = vpm; releq->vpmq_free = vpm->vpm_next; } /* * Indicate that the vpmap is on the releq at start */ vpm->vpm_ndxflg = VPMRELEQ; } }
static int vdev_disk_open(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift) { vdev_disk_t *dvd = NULL; vnode_t *devvp = NULLVP; vfs_context_t context = NULL; uint64_t blkcnt; uint32_t blksize; int fmode = 0; int error = 0; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } dvd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); if (dvd == NULL) return ENOMEM; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. * */ /* ### APPLE TODO ### */ /* ddi_devid_str_decode */ context = vfs_context_create((vfs_context_t)0); /* Obtain an opened/referenced vnode for the device. */ error = vnode_open(vd->vdev_path, spa_mode(vd->vdev_spa), 0, 0, &devvp, context); if (error) { goto out; } if (!vnode_isblk(devvp)) { error = ENOTBLK; goto out; } /* ### APPLE TODO ### */ /* vnode_authorize devvp for KAUTH_VNODE_READ_DATA and * KAUTH_VNODE_WRITE_DATA */ /* * Disallow opening of a device that is currently in use. * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) { goto out; } if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) { error = ENOTBLK; goto out; } if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) { goto out; } /* * Determine the actual size of the device. */ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context) != 0 || VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context) != 0) { error = EINVAL; goto out; } *size = blkcnt * (uint64_t)blksize; /* * ### APPLE TODO ### * If we own the whole disk, try to enable disk write caching. */ /* * Take the device's minimum transfer size into account. */ *ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1; /* * Setting the vdev_ashift did in fact break the pool for import * on ZEVO. This puts the logic into question. It appears that vdev_top * will also then change. It then panics in space_map from metaslab_alloc */ //vd->vdev_ashift = *ashift; dvd->vd_ashift = *ashift; /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; vd->vdev_tsd = dvd; dvd->vd_devvp = devvp; out: if (error) { if (devvp) vnode_close(devvp, fmode, context); if (dvd) kmem_free(dvd, sizeof (vdev_disk_t)); /* * Since the open has failed, vd->vdev_tsd should * be NULL when we get here, signaling to the * rest of the spa not to try and reopen or close this device */ vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; } if (context) { (void) vfs_context_rele(context); } return (error); }
void cpu_intrq_cleanup(struct cpu *cpu) { struct machcpu *mcpup = &cpu->cpu_m; int cpu_list_size; uint64_t cpu_q_size; uint64_t dev_q_size; uint64_t cpu_rq_size; uint64_t cpu_nrq_size; /* * Free mondo data for xcalls. */ if (mcpup->mondo_data) { contig_mem_free(mcpup->mondo_data, INTR_REPORT_SIZE); mcpup->mondo_data = NULL; mcpup->mondo_data_ra = NULL; } /* * Free per-cpu list of ncpu_guest_max for xcalls */ cpu_list_size = ncpu_guest_max * sizeof (uint16_t); if (cpu_list_size < INTR_REPORT_SIZE) cpu_list_size = INTR_REPORT_SIZE; /* * contig_mem_alloc() requires size to be a power of 2. * Increase size to a power of 2 if necessary. */ if ((cpu_list_size & (cpu_list_size - 1)) != 0) { cpu_list_size = 1 << highbit(cpu_list_size); } if (mcpup->cpu_list) { contig_mem_free(mcpup->cpu_list, cpu_list_size); mcpup->cpu_list = NULL; mcpup->cpu_list_ra = NULL; } /* * Free sun4v interrupt and error queues. */ if (mcpup->cpu_q_va) { cpu_q_size = cpu_q_entries * INTR_REPORT_SIZE; contig_mem_free(mcpup->cpu_q_va, cpu_q_size); mcpup->cpu_q_va = NULL; mcpup->cpu_q_base_pa = NULL; mcpup->cpu_q_size = 0; } if (mcpup->dev_q_va) { dev_q_size = dev_q_entries * INTR_REPORT_SIZE; contig_mem_free(mcpup->dev_q_va, dev_q_size); mcpup->dev_q_va = NULL; mcpup->dev_q_base_pa = NULL; mcpup->dev_q_size = 0; } if (mcpup->cpu_rq_va) { cpu_rq_size = cpu_rq_entries * Q_ENTRY_SIZE; contig_mem_free(mcpup->cpu_rq_va, 2 * cpu_rq_size); mcpup->cpu_rq_va = NULL; mcpup->cpu_rq_base_pa = NULL; mcpup->cpu_rq_size = 0; } if (mcpup->cpu_nrq_va) { cpu_nrq_size = cpu_nrq_entries * Q_ENTRY_SIZE; contig_mem_free(mcpup->cpu_nrq_va, 2 * cpu_nrq_size); mcpup->cpu_nrq_va = NULL; mcpup->cpu_nrq_base_pa = NULL; mcpup->cpu_nrq_size = 0; } }
int cpu_intrq_setup(struct cpu *cpu) { struct machcpu *mcpup = &cpu->cpu_m; size_t size; /* * This routine will return with an error return if any * contig_mem_alloc() fails. It is expected that the caller will * call cpu_intrq_cleanup() (or cleanup_cpu_common() which will). * That will cleanly free only those blocks that were alloc'd. */ /* * Allocate mondo data for xcalls. */ mcpup->mondo_data = contig_mem_alloc(INTR_REPORT_SIZE); if (mcpup->mondo_data == NULL) { cmn_err(CE_NOTE, "cpu%d: cpu mondo_data allocation failed", cpu->cpu_id); return (ENOMEM); } /* * va_to_pa() is too expensive to call for every crosscall * so we do it here at init time and save it in machcpu. */ mcpup->mondo_data_ra = va_to_pa(mcpup->mondo_data); /* * Allocate a per-cpu list of ncpu_guest_max for xcalls */ size = ncpu_guest_max * sizeof (uint16_t); if (size < INTR_REPORT_SIZE) size = INTR_REPORT_SIZE; /* * contig_mem_alloc() requires size to be a power of 2. * Increase size to a power of 2 if necessary. */ if ((size & (size - 1)) != 0) { size = 1 << highbit(size); } mcpup->cpu_list = contig_mem_alloc(size); if (mcpup->cpu_list == NULL) { cmn_err(CE_NOTE, "cpu%d: cpu cpu_list allocation failed", cpu->cpu_id); return (ENOMEM); } mcpup->cpu_list_ra = va_to_pa(mcpup->cpu_list); /* * Allocate sun4v interrupt and error queues. */ size = cpu_q_entries * INTR_REPORT_SIZE; mcpup->cpu_q_va = contig_mem_alloc(size); if (mcpup->cpu_q_va == NULL) { cmn_err(CE_NOTE, "cpu%d: cpu intrq allocation failed", cpu->cpu_id); return (ENOMEM); } mcpup->cpu_q_base_pa = va_to_pa(mcpup->cpu_q_va); mcpup->cpu_q_size = size; /* * Allocate device queues */ size = dev_q_entries * INTR_REPORT_SIZE; mcpup->dev_q_va = contig_mem_alloc(size); if (mcpup->dev_q_va == NULL) { cmn_err(CE_NOTE, "cpu%d: dev intrq allocation failed", cpu->cpu_id); return (ENOMEM); } mcpup->dev_q_base_pa = va_to_pa(mcpup->dev_q_va); mcpup->dev_q_size = size; /* * Allocate resumable queue and its kernel buffer */ size = cpu_rq_entries * Q_ENTRY_SIZE; mcpup->cpu_rq_va = contig_mem_alloc(2 * size); if (mcpup->cpu_rq_va == NULL) { cmn_err(CE_NOTE, "cpu%d: resumable queue allocation failed", cpu->cpu_id); return (ENOMEM); } mcpup->cpu_rq_base_pa = va_to_pa(mcpup->cpu_rq_va); mcpup->cpu_rq_size = size; /* zero out the memory */ bzero(mcpup->cpu_rq_va, 2 * size); /* * Allocate non-resumable queues */ size = cpu_nrq_entries * Q_ENTRY_SIZE; mcpup->cpu_nrq_va = contig_mem_alloc(2 * size); if (mcpup->cpu_nrq_va == NULL) { cmn_err(CE_NOTE, "cpu%d: nonresumable queue allocation failed", cpu->cpu_id); return (ENOMEM); } mcpup->cpu_nrq_base_pa = va_to_pa(mcpup->cpu_nrq_va); mcpup->cpu_nrq_size = size; /* zero out the memory */ bzero(mcpup->cpu_nrq_va, 2 * size); return (0); }
static void XSetupDisplay(int nframes) { XGCValues xgcv; XtAccelerators keys; /* Had to do it this way since embedding the keystrokes in the fallback resources failed to work properly -- what a kludge. */ keys = XtParseAcceleratorTable(ckeys); xi.depth = DefaultDepthOfScreen(DefaultScreenOfDisplay(xi.disp)); // give me TrueColor if (!XMatchVisualInfo(xi.disp, xi.screenno, xi.depth, TrueColor, &(xi.vi))) ErrorExit(ERROR_BADPARM, "Could not find a TrueColor visual"); xi.vis = xi.vi.visual; xi.root = RootWindow(xi.disp, xi.screenno); // AllocNone -- clients can allocate the colormap entries // For TrueColor, alloc must be AloocNone xi.colormap = XCreateColormap(xi.disp, xi.root, xi.vis, AllocNone); toplevel = XtVaAppCreateShell("NMovie", "NMovie", applicationShellWidgetClass, xi.disp, XtNvisual, xi.vis, XtNcolormap, xi.colormap, NULL); XtAppAddActions(xi.context,actions,XtNumber(actions)); // frame frame = XtVaCreateManagedWidget("Frame", formWidgetClass, toplevel, NULL); // create buttons buttons = XtVaCreateManagedWidget("Buttons", formWidgetClass, frame, NULL ); loop_bt = XtVaCreateManagedWidget("Loop", commandWidgetClass, buttons, NULL); swing_bt = XtVaCreateManagedWidget("Swing", commandWidgetClass, buttons, NULL); fast_bt = XtVaCreateManagedWidget("Faster", commandWidgetClass, buttons, NULL); slow_bt = XtVaCreateManagedWidget("Slower", commandWidgetClass, buttons, NULL); stop_bt = XtVaCreateManagedWidget("Stop", commandWidgetClass, buttons, NULL); back_bt = XtVaCreateManagedWidget("Back", commandWidgetClass, buttons, NULL); forward_bt = XtVaCreateManagedWidget("Forward", commandWidgetClass, buttons, NULL); quit_bt = XtVaCreateManagedWidget("Quit", commandWidgetClass, buttons, NULL); // canvas canvas = XtVaCreateManagedWidget("Canvas", simpleWidgetClass, frame, XtNwidth, cols, XtNheight, rows, XtNaccelerators, keys, NULL); XtInstallAllAccelerators(canvas,toplevel); XtRealizeWidget(toplevel); xi.canvas = XtWindow(canvas); xi.theGC = XCreateGC(xi.disp, xi.canvas, 0L, &xgcv); xi.rmask = xi.vis->red_mask; // 0xFF0000 xi.gmask = xi.vis->green_mask; // 0x00FF00 xi.bmask = xi.vis->blue_mask; // 0x0000FF xi.rshift = 7 - highbit(xi.rmask); // -16 xi.gshift = 7 - highbit(xi.gmask); // -8 xi.bshift = 7 - highbit(xi.bmask); // 0 // format is ZPixmap offset,data ximg = XCreateImage(xi.disp,xi.vis,xi.depth,ZPixmap, 0, NULL, // bytes_per_line = 0 means assume contiguous and calculated cols, rows, 32, 0); if ((imgdata = (char *)calloc((size_t)(rows*ximg->bytes_per_line*nframes), sizeof(byte))) ==NULL) ErrorExit(ERROR_NO_MEMORY,"Failed to allocate image buffer"); ximg->data = (char *) imgdata; }
Pixmap xskin_loadBMP( Display *d, Window w, char *filename, int *width, int *height ) { Pixmap ret=0; BMPHeader *bmp; struct timidity_file *fp; GC gc; if ( width!=NULL ) *width=-1; if ( height!=NULL ) *height=-1; sc = DefaultScreen( d ); gc = DefaultGC( d, sc ); cmap = DefaultColormap( d, sc ); rshift = 15-highbit(xskin_vis->red_mask); gshift = 15-highbit(xskin_vis->green_mask); bshift = 15-highbit(xskin_vis->blue_mask); fp = open_file( filename, 1, OF_SILENT ); if ( fp == NULL ) return ret; if ( fp->url->url_tell == NULL ) { fp->url = url_buff_open( fp->url, 1 ); } bmp = loadBMPHeader( fp ); if ( bmp==NULL ) goto finish1; if ( !loadBMPColors( d, bmp, fp ) ) goto finish1; ret = XCreatePixmap( d, w, bmp->w, bmp->h, xskin_depth ); XSetForeground( d, gc, 0 ); XFillRectangle( d, ret, gc, 0, 0, bmp->w, bmp->h ); XSetForeground( d, gc, WhitePixel( d, sc )); switch( bmp->bitcounts ) { case 4: if ( bmp->compress == 0 ) Draw4bit( d, ret, gc, bmp, fp ); else if ( bmp->compress == 2 ) DrawCompressed4bit( d, ret, gc, bmp, fp ); break; case 8: if ( bmp->compress == 0 ) Draw8bit( d, ret, gc, bmp, fp ); else if ( bmp->compress == 1 ) DrawCompressed8bit( d, ret, gc, bmp, fp ); break; case 24: Draw24bit( d, ret, gc, bmp, fp ); break; default: break; } if ( width!=NULL ) *width = bmp->w; if ( height!=NULL ) *height = bmp->h; finish1: close_file( fp ); return ret; }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd; struct dk_minfo_ext dkmext; int error; dev_t dev; int otyp; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (vd->vdev_tsd != NULL) { ASSERT(vd->vdev_reopening); dvd = vd->vdev_tsd; goto skip_open; } dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { ddi_devid_t devid; if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); ldi_handle_t lh; (void) snprintf(buf, len, "%ss0", vd->vdev_path); if (ldi_open_by_name(buf, spa_mode(spa), kcred, &lh, zfs_li) == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; (void) ldi_close(lh, spa_mode(spa), kcred); } else { kmem_free(buf, len); } } error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = EINVAL; (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } skip_open: /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (EINVAL); } /* * If we own the whole disk, try to enable disk write caching. * We ignore errors because it's OK if we can't do it. */ if (vd->vdev_wholedisk == 1) { int wce = 1; (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext, FKIOCTL, kcred, NULL) != 0) dkmext.dki_pbsize = DEV_BSIZE; *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1; /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; return (0); }
/* * tavor_srq_modify() * Context: Can be called only from user or kernel context. */ int tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size, uint_t *real_size, uint_t sleepflag) { tavor_qalloc_info_t new_srqinfo, old_srqinfo; tavor_rsrc_t *mtt, *mpt, *old_mtt; tavor_bind_info_t bind; tavor_bind_info_t old_bind; tavor_rsrc_pool_info_t *rsrc_pool; tavor_mrhdl_t mr; tavor_hw_mpt_t mpt_entry; tavor_wrid_entry_t *wre_new, *wre_old; uint64_t mtt_ddrbaseaddr, mtt_addr; uint64_t srq_desc_off; uint32_t *buf, srq_old_bufsz; uint32_t wqesz; uint_t max_srq_size; uint_t dma_xfer_mode, mtt_pgsize_bits; uint_t srq_sync, log_srq_size, maxprot; uint_t wq_location; int status; char *errormsg; TAVOR_TNF_ENTER(tavor_srq_modify); /* * Check the "inddr" flag. This flag tells the driver whether or not * the SRQ's work queues should be come from normal system memory or * whether they should be allocated from DDR memory. */ wq_location = state->ts_cfg_profile->cp_srq_wq_inddr; /* * If size requested is larger than device capability, return * Insufficient Resources */ max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz); if (size > max_srq_size) { TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize, TAVOR_TNF_ERROR, ""); TAVOR_TNF_EXIT(tavor_srq_modify); return (IBT_HCA_WR_EXCEEDED); } /* * Calculate the appropriate size for the SRQ. * Note: All Tavor SRQs must be a power-of-2 in size. Also * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step * is to round the requested size up to the next highest power-of-2 */ size = max(size, TAVOR_SRQ_MIN_SIZE); log_srq_size = highbit(size); if ((size & (size - 1)) == 0) { log_srq_size = log_srq_size - 1; } /* * Next we verify that the rounded-up size is valid (i.e. consistent * with the device limits and/or software-configured limits). */ if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); goto srqmodify_fail; } /* * Allocate the memory for newly resized Shared Receive Queue. * * Note: If SRQ is not user-mappable, then it may come from either * kernel system memory or from HCA-attached local DDR memory. * * Note2: We align this queue on a pagesize boundary. This is required * to make sure that all the resulting IB addresses will start at 0, * for a zero-based queue. By making sure we are aligned on at least a * page, any offset we use into our queue will be the same as it was * when we allocated it at tavor_srq_alloc() time. */ wqesz = (1 << srq->srq_wq_log_wqesz); new_srqinfo.qa_size = (1 << log_srq_size) * wqesz; new_srqinfo.qa_alloc_align = PAGESIZE; new_srqinfo.qa_bind_align = PAGESIZE; if (srq->srq_is_umap) { new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; } else { new_srqinfo.qa_location = wq_location; } status = tavor_queue_alloc(state, &new_srqinfo, sleepflag); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); goto srqmodify_fail; } buf = (uint32_t *)new_srqinfo.qa_buf_aligned; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) /* * Allocate the memory for the new WRE list. This will be used later * when we resize the wridlist based on the new SRQ size. */ wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) * sizeof (tavor_wrid_entry_t), sleepflag); if (wre_new == NULL) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wre_new alloc"); goto srqmodify_fail; } /* * Fill in the "bind" struct. This struct provides the majority * of the information that will be used to distinguish between an * "addr" binding (as is the case here) and a "buf" binding (see * below). The "bind" struct is later passed to tavor_mr_mem_bind() * which does most of the "heavy lifting" for the Tavor memory * registration routines. */ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind)) bzero(&bind, sizeof (tavor_bind_info_t)); bind.bi_type = TAVOR_BINDHDL_VADDR; bind.bi_addr = (uint64_t)(uintptr_t)buf; bind.bi_len = new_srqinfo.qa_size; bind.bi_as = NULL; bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP : IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE; if (srq->srq_is_umap) { bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass; } else { if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass; dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; if (dma_xfer_mode == DDI_DMA_STREAMING) { bind.bi_flags |= IBT_MR_NONCOHERENT; } } else { bind.bi_bypass = TAVOR_BINDMEM_BYPASS; } } status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt, &mtt_pgsize_bits); if (status != DDI_SUCCESS) { /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(status, "failed mtt bind"); kmem_free(wre_new, srq->srq_wq_bufsz * sizeof (tavor_wrid_entry_t)); tavor_queue_free(state, &new_srqinfo); goto srqmodify_fail; } /* * Calculate the offset between the kernel virtual address space * and the IB virtual address space. This will be used when * posting work requests to properly initialize each WQE. * * Note: bind addr is zero-based (from alloc) so we calculate the * correct new offset here. */ bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1); srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned - (uint64_t)bind.bi_addr; /* * Get the base address for the MTT table. This will be necessary * below when we are modifying the MPT entry. */ rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Tavor hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. */ bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); mpt_entry.reg_win_len = bind.bi_len; mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); mpt_entry.mttseg_addr_h = mtt_addr >> 32; mpt_entry.mttseg_addr_l = mtt_addr >> 6; /* * Now we grab the SRQ lock. Since we will be updating the actual * SRQ location and the producer/consumer indexes, we should hold * the lock. * * We do a TAVOR_NOSLEEP here (and below), though, because we are * holding the "srq_lock" and if we got raised to interrupt level * by priority inversion, we would not want to block in this routine * waiting for success. */ mutex_enter(&srq->srq_lock); /* * Copy old entries to new buffer */ srq_old_bufsz = srq->srq_wq_bufsz; bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); /* Determine if later ddi_dma_sync will be necessary */ srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); /* Sync entire "new" SRQ for use by hardware (if necessary) */ if (srq_sync) { (void) ddi_dma_sync(bind.bi_dmahdl, 0, new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV); } /* * Setup MPT information for use in the MODIFY_MPT command */ mr = srq->srq_mrhdl; mutex_enter(&mr->mr_lock); mpt = srq->srq_mrhdl->mr_mptrsrcp; /* * MODIFY_MPT * * If this fails for any reason, then it is an indication that * something (either in HW or SW) has gone seriously wrong. So we * print a warning message and return. */ status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx, TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag); if (status != TAVOR_CMD_SUCCESS) { cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n", status); TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", tnf_uint, status, status); TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed"); (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo, srq->srq_mrhdl->mr_mttrsrcp); kmem_free(wre_new, srq->srq_wq_bufsz * sizeof (tavor_wrid_entry_t)); tavor_queue_free(state, &new_srqinfo); mutex_exit(&mr->mr_lock); mutex_exit(&srq->srq_lock); return (ibc_get_ci_failure(0)); } /* * Update the Tavor Shared Receive Queue handle with all the new * information. At the same time, save away all the necessary * information for freeing up the old resources */ old_srqinfo = srq->srq_wqinfo; old_mtt = srq->srq_mrhdl->mr_mttrsrcp; bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind, sizeof (tavor_bind_info_t)); /* Now set the new info */ srq->srq_wqinfo = new_srqinfo; srq->srq_wq_buf = buf; srq->srq_wq_bufsz = (1 << log_srq_size); bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t)); srq->srq_mrhdl->mr_mttrsrcp = mtt; srq->srq_desc_off = srq_desc_off; srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); /* Update MR mtt pagesize */ mr->mr_logmttpgsz = mtt_pgsize_bits; mutex_exit(&mr->mr_lock); #ifdef __lock_lint mutex_enter(&srq->srq_wrid_wql->wql_lock); #else if (srq->srq_wrid_wql != NULL) { mutex_enter(&srq->srq_wrid_wql->wql_lock); } #endif /* * Initialize new wridlist, if needed. * * If a wridlist already is setup on an SRQ (the QP associated with an * SRQ has moved "from_reset") then we must update this wridlist based * on the new SRQ size. We allocate the new size of Work Request ID * Entries, copy over the old entries to the new list, and * re-initialize the srq wridlist in non-umap case */ wre_old = NULL; if (srq->srq_wridlist != NULL) { wre_old = srq->srq_wridlist->wl_wre; bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (tavor_wrid_entry_t)); /* Setup new sizes in wre */ srq->srq_wridlist->wl_wre = wre_new; srq->srq_wridlist->wl_size = srq->srq_wq_bufsz; if (!srq->srq_is_umap) { tavor_wrid_list_srq_init(srq->srq_wridlist, srq, srq_old_bufsz); } } #ifdef __lock_lint mutex_exit(&srq->srq_wrid_wql->wql_lock); #else if (srq->srq_wrid_wql != NULL) { mutex_exit(&srq->srq_wrid_wql->wql_lock); } #endif /* * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out * to a user process, then we need to call devmap_devmem_remap() to * invalidate the mapping to the SRQ memory. We also need to * invalidate the SRQ tracking information for the user mapping. * * Note: On failure, the remap really shouldn't ever happen. So, if it * does, it is an indication that something has gone seriously wrong. * So we print a warning message and return error (knowing, of course, * that the "old" SRQ memory will be leaked) */ if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) { maxprot = (PROT_READ | PROT_WRITE | PROT_USER); status = devmap_devmem_remap(srq->srq_umap_dhp, state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); if (status != DDI_SUCCESS) { mutex_exit(&srq->srq_lock); TAVOR_WARNING(state, "failed in SRQ memory " "devmap_devmem_remap()"); /* We can, however, free the memory for old wre */ if (wre_old != NULL) { kmem_free(wre_old, srq_old_bufsz * sizeof (tavor_wrid_entry_t)); } TAVOR_TNF_EXIT(tavor_srq_modify); return (ibc_get_ci_failure(0)); } srq->srq_umap_dhp = (devmap_cookie_t)NULL; } /* * Drop the SRQ lock now. The only thing left to do is to free up * the old resources. */ mutex_exit(&srq->srq_lock); /* * Unbind the MTT entries. */ status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt); if (status != DDI_SUCCESS) { TAVOR_WARNING(state, "failed to unbind old SRQ memory"); /* Set "status" and "errormsg" and goto failure */ TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed to unbind (old)"); goto srqmodify_fail; } /* Free the memory for old wre */ if (wre_old != NULL) { kmem_free(wre_old, srq_old_bufsz * sizeof (tavor_wrid_entry_t)); } /* Free the memory for the old SRQ */ tavor_queue_free(state, &old_srqinfo); /* * Fill in the return arguments (if necessary). This includes the * real new completion queue size. */ if (real_size != NULL) { *real_size = (1 << log_srq_size); } TAVOR_TNF_EXIT(tavor_srq_modify); return (DDI_SUCCESS); srqmodify_fail: TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, errormsg); TAVOR_TNF_EXIT(tavor_srq_modify); return (status); }
/* ARGSUSED */ int socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr, caller_context_t *ct) { dev_t fsid; struct sonode *so; static int sonode_shift = 0; /* * Calculate the amount of bitshift to a sonode pointer which will * still keep it unique. See below. */ if (sonode_shift == 0) sonode_shift = highbit(sizeof (struct sonode)); ASSERT(sonode_shift > 0); so = VTOSO(vp); fsid = sockdev; if (so->so_version == SOV_STREAM) { /* * The imaginary "sockmod" has been popped - act * as a stream */ vap->va_type = VCHR; vap->va_mode = 0; } else { vap->va_type = vp->v_type; vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| S_IROTH|S_IWOTH; } vap->va_uid = vap->va_gid = 0; vap->va_fsid = fsid; /* * If the va_nodeid is > MAX_USHORT, then i386 stats might fail. * So we shift down the sonode pointer to try and get the most * uniqueness into 16-bits. */ vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF; vap->va_nlink = 0; vap->va_size = 0; /* * We need to zero out the va_rdev to avoid some fstats getting * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. */ vap->va_rdev = (dev_t)0; vap->va_blksize = MAXBSIZE; vap->va_nblocks = btod(vap->va_size); if (!SOCK_IS_NONSTR(so)) { sotpi_info_t *sti = SOTOTPI(so); mutex_enter(&so->so_lock); vap->va_atime.tv_sec = sti->sti_atime; vap->va_mtime.tv_sec = sti->sti_mtime; vap->va_ctime.tv_sec = sti->sti_ctime; mutex_exit(&so->so_lock); } else { vap->va_atime.tv_sec = 0; vap->va_mtime.tv_sec = 0; vap->va_ctime.tv_sec = 0; } vap->va_atime.tv_nsec = 0; vap->va_mtime.tv_nsec = 0; vap->va_ctime.tv_nsec = 0; vap->va_seq = 0; return (0); }
static zap_t * mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) { zap_t *winner; zap_t *zap; int i; ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); rw_init(&zap->zap_rwlock, NULL, 0, 0); rw_enter(&zap->zap_rwlock, RW_WRITER); zap->zap_objset = os; zap->zap_object = obj; zap->zap_dbuf = db; if (((uint64_t *)db->db_data)[0] != ZBT_MICRO) { mutex_init(&zap->zap_f.zap_num_entries_mtx, NULL, 0, 0); zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; } else { zap->zap_ismicro = TRUE; } /* * Make sure that zap_ismicro is set before we let others see * it, because zap_lockdir() checks zap_ismicro without the lock * held. */ winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); if (winner != NULL) { #ifdef __APPLE__ if (!zap->zap_ismicro) mutex_destroy(&zap->zap_f.zap_num_entries_mtx); #endif kmem_free(zap, sizeof (zap_t)); return (winner); } if (zap->zap_ismicro) { zap->zap_salt = zap->zap_m.zap_phys->mz_salt; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; avl_create(&zap->zap_m.zap_avl, mze_compare, sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; if (mze->mze_name[0]) { zap->zap_m.zap_num_entries++; mze_insert(zap, i, zap_hash(zap, mze->mze_name), mze); } } } else { zap->zap_salt = zap->zap_f.zap_phys->zap_salt; ASSERT3U(sizeof (struct zap_leaf_header), ==, 2*ZAP_LEAF_CHUNKSIZE); /* * The embedded pointer table should not overlap the * other members. */ ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, &zap->zap_f.zap_phys->zap_salt); /* * The embedded pointer table should end at the end of * the block */ ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - (uintptr_t)zap->zap_f.zap_phys, ==, zap->zap_dbuf->db_size); } rw_exit(&zap->zap_rwlock); return (zap); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd = vd->vdev_tsd; ldi_ev_cookie_t ecookie; vdev_disk_ldi_cb_t *lcb; union { struct dk_minfo_ext ude; struct dk_minfo ud; } dks; struct dk_minfo_ext *dkmext = &dks.ude; struct dk_minfo *dkm = &dks.ud; int error; dev_t dev; int otyp; boolean_t validate_devid = B_FALSE; ddi_devid_t devid; uint64_t capacity = 0, blksz = 0, pbsize; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (dvd != NULL) { if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) { /* * If we are opening a device in its offline notify * context, the LDI handle was just closed. Clean * up the LDI event callbacks and free vd->vdev_tsd. */ vdev_disk_free(vd); } else { ASSERT(vd->vdev_reopening); goto skip_open; } } /* * Create vd->vdev_tsd. */ vdev_disk_alloc(vd); dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); (void) snprintf(buf, len, "%ss0", vd->vdev_path); error = ldi_open_by_name(buf, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); if (error == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; } else { kmem_free(buf, len); } } /* * If we have not yet opened the device, try to open it by the * specified path. */ if (error != 0) { error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = SET_ERROR(EINVAL); (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) { error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { if (vd->vdev_devid != NULL) validate_devid = B_TRUE; if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } /* * Now that the device has been successfully opened, update the devid * if necessary. */ if (validate_devid && spa_writeable(spa) && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { char *vd_devid; vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); zfs_dbgmsg("vdev %s: update devid from %s, " "to %s", vd->vdev_path, vd->vdev_devid, vd_devid); spa_strfree(vd->vdev_devid); vd->vdev_devid = spa_strdup(vd_devid); ddi_devid_str_free(vd_devid); } ddi_devid_free(devid); } /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } /* * Register callbacks for the LDI offline event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); } /* * Register callbacks for the LDI degrade event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); } skip_open: /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (SET_ERROR(EINVAL)); } *max_psize = *psize; /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) { capacity = dkmext->dki_capacity - 1; blksz = dkmext->dki_lbsize; pbsize = dkmext->dki_pbsize; } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) { VDEV_DEBUG( "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n", vd->vdev_path); capacity = dkm->dki_capacity - 1; blksz = dkm->dki_lbsize; pbsize = blksz; } else { VDEV_DEBUG("vdev_disk_open(\"%s\"): " "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", vd->vdev_path, error); pbsize = DEV_BSIZE; } *ashift = highbit(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; if (vd->vdev_wholedisk == 1) { int wce = 1; if (error == 0) { /* * If we have the capability to expand, we'd have * found out via success from DKIOCGMEDIAINFO{,EXT}. * Adjust max_psize upward accordingly since we know * we own the whole disk now. */ *max_psize += vdev_disk_get_space(vd, capacity, blksz); zfs_dbgmsg("capacity change: vdev %s, psize %llu, " "max_psize %llu", vd->vdev_path, *psize, *max_psize); } /* * Since we own the whole disk, try to enable disk write * caching. We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; return (0); }
static taskq_t * taskq_create_common(const char *name, int instance, int nthreads, pri_t pri, int minalloc, int maxalloc, uint_t flags) { taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_NOSLEEP); uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus); uint_t bsize; /* # of buckets - always power of 2 */ ASSERT(instance == 0); ASSERT(flags == TASKQ_PREPOPULATE | TASKQ_NOINSTANCE); /* * TASKQ_CPR_SAFE and TASKQ_DYNAMIC flags are mutually exclusive. */ ASSERT((flags & (TASKQ_DYNAMIC | TASKQ_CPR_SAFE)) != ((TASKQ_DYNAMIC | TASKQ_CPR_SAFE))); ASSERT(tq->tq_buckets == NULL); bsize = 1 << (highbit(ncpus) - 1); ASSERT(bsize >= 1); bsize = MIN(bsize, taskq_maxbuckets); tq->tq_maxsize = nthreads; (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); tq->tq_name[TASKQ_NAMELEN] = '\0'; /* Make sure the name conforms to the rules for C indentifiers */ strident_canon(tq->tq_name, TASKQ_NAMELEN); tq->tq_flags = flags | TASKQ_ACTIVE; tq->tq_active = nthreads; tq->tq_nthreads = nthreads; tq->tq_minalloc = minalloc; tq->tq_maxalloc = maxalloc; tq->tq_nbuckets = bsize; tq->tq_pri = pri; if (flags & TASKQ_PREPOPULATE) { mutex_enter(&tq->tq_lock); while (minalloc-- > 0) taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP)); mutex_exit(&tq->tq_lock); } if (nthreads == 1) { tq->tq_thread = thread_create(NULL, 0, taskq_thread, tq, 0, NULL, TS_RUN, pri); } else { kthread_t **tpp = kmem_alloc(sizeof (kthread_t *) * nthreads, KM_SLEEP); tq->tq_threadlist = tpp; mutex_enter(&tq->tq_lock); while (nthreads-- > 0) { *tpp = thread_create(NULL, 0, taskq_thread, tq, 0, NULL, TS_RUN, pri); tpp++; } mutex_exit(&tq->tq_lock); } return (tq); }