/* * Clock tick initialization is done in two phases: * * 1. Before clock_init() is called, clock_tick_init_pre() is called to set * up single-threading so the clock() can begin to do its job. * * 2. After the slave CPUs are initialized at boot time, we know the number * of CPUs. clock_tick_init_post() is called to set up multi-threading if * required. */ void clock_tick_init_pre(void) { clock_tick_cpu_t *ctp; int i, n; clock_tick_set_t *csp; uintptr_t buf; size_t size; clock_tick_single_threaded = 1; size = P2ROUNDUP(sizeof (clock_tick_cpu_t), CLOCK_TICK_ALIGN); buf = (uintptr_t)kmem_zalloc(size * NCPU + CLOCK_TICK_ALIGN, KM_SLEEP); buf = P2ROUNDUP(buf, CLOCK_TICK_ALIGN); /* * Perform initialization in case multi-threading is chosen later. */ if (&create_softint != NULL) { clock_tick_intr = create_softint(LOCK_LEVEL, clock_tick_execute, (caddr_t)NULL); } for (i = 0; i < NCPU; i++, buf += size) { ctp = (clock_tick_cpu_t *)buf; clock_tick_cpu[i] = ctp; mutex_init(&ctp->ct_lock, NULL, MUTEX_DEFAULT, NULL); if (&create_softint != NULL) { ctp->ct_intr = clock_tick_intr; } ctp->ct_pending = 0; } mutex_init(&clock_tick_lock, NULL, MUTEX_DEFAULT, NULL); /* * Compute clock_tick_ncpus here. We need it to compute the * maximum number of tick sets we need to support. */ ASSERT(clock_tick_ncpus >= 0); if (clock_tick_ncpus == 0) clock_tick_ncpus = CLOCK_TICK_NCPUS; if (clock_tick_ncpus > max_ncpus) clock_tick_ncpus = max_ncpus; /* * Allocate and initialize the tick sets. */ n = (max_ncpus + clock_tick_ncpus - 1)/clock_tick_ncpus; clock_tick_set = kmem_zalloc(sizeof (clock_tick_set_t) * n, KM_SLEEP); for (i = 0; i < n; i++) { csp = &clock_tick_set[i]; csp->ct_start = i * clock_tick_ncpus; csp->ct_scan = csp->ct_start; csp->ct_end = csp->ct_start; } }
/* * Propagate the bootblock on the source disk to the destination disk and * version it with 'updt_str' in the process. Since we cannot trust any data * on the attaching disk, we do not perform any specific check on a potential * target extended information structure and we just blindly update. */ static int propagate_bootblock(ig_data_t *source, ig_data_t *target, char *updt_str) { ig_device_t *src_device = &source->device; ig_device_t *dest_device = &target->device; ig_stage2_t *src_stage2 = &source->stage2; ig_stage2_t *dest_stage2 = &target->stage2; uint32_t buf_size; int retval; assert(source != NULL); assert(target != NULL); /* read in stage1 from the source disk. */ if (read_stage1_from_disk(src_device->part_fd, target->stage1_buf) != BC_SUCCESS) return (BC_ERROR); /* Prepare target stage2 for commit_to_disk. */ cleanup_stage2(dest_stage2); if (updt_str != NULL) do_version = B_TRUE; else do_version = B_FALSE; buf_size = src_stage2->file_size + SECTOR_SIZE; dest_stage2->buf_size = P2ROUNDUP(buf_size, SECTOR_SIZE); dest_stage2->buf = malloc(dest_stage2->buf_size); if (dest_stage2->buf == NULL) { perror(gettext("Memory allocation failed")); return (BC_ERROR); } dest_stage2->file = dest_stage2->buf; dest_stage2->file_size = src_stage2->file_size; memcpy(dest_stage2->file, src_stage2->file, dest_stage2->file_size); dest_stage2->extra = dest_stage2->buf + P2ROUNDUP(dest_stage2->file_size, 8); /* If we get down here we do have a mboot structure. */ assert(src_stage2->mboot); dest_stage2->mboot_off = src_stage2->mboot_off; dest_stage2->mboot = (multiboot_header_t *)(dest_stage2->buf + dest_stage2->mboot_off); (void) fprintf(stdout, gettext("Propagating %s stage1/stage2 to %s\n"), src_device->path, dest_device->path); retval = commit_to_disk(target, updt_str); return (retval); }
/* * _sbrk_grow_aligned() aligns the old break to a low_align boundry, * adds min_size, aligns to a high_align boundry, and calls _brk_unlocked() * to set the new break. The low_aligned-aligned value is returned, and * the actual space allocated is returned through actual_size. * * Unlike sbrk(2), _sbrk_grow_aligned takes an unsigned size, and does * not allow shrinking the heap. */ void * _sbrk_grow_aligned(size_t min_size, size_t low_align, size_t high_align, size_t *actual_size) { uintptr_t old_brk; uintptr_t ret_brk; uintptr_t high_brk; uintptr_t new_brk; int brk_result; if (!primary_link_map) { errno = ENOTSUP; return ((void *)-1); } if ((low_align & (low_align - 1)) != 0 || (high_align & (high_align - 1)) != 0) { errno = EINVAL; return ((void *)-1); } low_align = MAX(low_align, ALIGNSZ); high_align = MAX(high_align, ALIGNSZ); lmutex_lock(&__sbrk_lock); old_brk = (uintptr_t)BRKALIGN(_nd); ret_brk = P2ROUNDUP(old_brk, low_align); high_brk = ret_brk + min_size; new_brk = P2ROUNDUP(high_brk, high_align); /* * Check for overflow */ if (ret_brk < old_brk || high_brk < ret_brk || new_brk < high_brk) { lmutex_unlock(&__sbrk_lock); errno = ENOMEM; return ((void *)-1); } if ((brk_result = _brk_unlocked((void *)new_brk)) == 0) _nd = (void *)new_brk; lmutex_unlock(&__sbrk_lock); if (brk_result != 0) return ((void *)-1); if (actual_size != NULL) *actual_size = (new_brk - ret_brk); return ((void *)ret_brk); }
static void sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, sa_iterfunc_t func, sa_lot_t *tab, void *userp) { void *data_start; sa_lot_t *tb = tab; sa_lot_t search; avl_index_t loc; sa_os_t *sa = os->os_sa; int i; uint16_t *length_start = NULL; uint8_t length_idx = 0; if (tab == NULL) { search.lot_num = SA_LAYOUT_NUM(hdr, type); tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); ASSERT(tb); } if (IS_SA_BONUSTYPE(type)) { data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + offsetof(sa_hdr_phys_t, sa_lengths) + (sizeof (uint16_t) * tb->lot_var_sizes)), 8); length_start = hdr->sa_lengths; } else { data_start = hdr; } for (i = 0; i != tb->lot_attr_count; i++) { int attr_length, reg_length; uint8_t idx_len; reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; if (reg_length) { attr_length = reg_length; idx_len = 0; } else { attr_length = length_start[length_idx]; idx_len = length_idx++; } func(hdr, data_start, tb->lot_attrs[i], attr_length, idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + attr_length), 8); } }
static void fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp) { ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size; ckp->ckp_strn += strlen(bp->buf_name) + 1; ckp->ckp_secs++; }
int dtrace_getstackdepth(dtrace_mstate_t *mstate, int aframes) { uintptr_t old = mstate->dtms_scratch_ptr; size_t size; struct stacktrace_state st = { NULL, NULL, 0, aframes, STACKTRACE_KERNEL }; st.pcs = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size = (uintptr_t)st.pcs - mstate->dtms_scratch_ptr + aframes * sizeof(uint64_t); if (mstate->dtms_scratch_ptr + size > mstate->dtms_scratch_base + mstate->dtms_scratch_size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return 0; } dtrace_stacktrace(&st); mstate->dtms_scratch_ptr = old; return st.depth; }
/*ARGSUSED*/ static int smb_open(dev_t *dp, int flag, int otyp, cred_t *cred) { minor_t c; if (ksmbios == NULL) return (ENXIO); /* * Locate and reserve a clone structure. We skip clone 0 as that is * the real minor number, and we assign a new minor to each clone. */ for (c = 1; c < smb_nclones; c++) { if (casptr(&smb_clones[c].c_hdl, NULL, ksmbios) == NULL) break; } if (c >= smb_nclones) return (EAGAIN); smb_clones[c].c_eplen = P2ROUNDUP(sizeof (smbios_entry_t), 16); smb_clones[c].c_stlen = smbios_buflen(smb_clones[c].c_hdl); *dp = makedevice(getemajor(*dp), c); (void) ddi_prop_update_int(*dp, smb_devi, "size", smb_clones[c].c_eplen + smb_clones[c].c_stlen); return (0); }
/* * Update cache contents upon write completion. */ void vdev_cache_write(zio_t *zio) { vdev_cache_t *vc = &zio->io_vd->vdev_cache; vdev_cache_entry_t *ve, ve_search; uint64_t io_start = zio->io_offset; uint64_t io_end = io_start + zio->io_size; uint64_t min_offset = P2ALIGN(io_start, VCBS); uint64_t max_offset = P2ROUNDUP(io_end, VCBS); avl_index_t where; ASSERT(zio->io_type == ZIO_TYPE_WRITE); mutex_enter(&vc->vc_lock); ve_search.ve_offset = min_offset; ve = avl_find(&vc->vc_offset_tree, &ve_search, &where); if (ve == NULL) ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER); while (ve != NULL && ve->ve_offset < max_offset) { uint64_t start = MAX(ve->ve_offset, io_start); uint64_t end = MIN(ve->ve_offset + VCBS, io_end); if (ve->ve_fill_io != NULL) { ve->ve_missed_update = 1; } else { bcopy((char *)zio->io_data + start - io_start, ve->ve_data + start - ve->ve_offset, end - start); } ve = AVL_NEXT(&vc->vc_offset_tree, ve); } mutex_exit(&vc->vc_lock); }
static fcf_secidx_t fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size) { const fmd_ckpt_desc_t *dp; ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t)); dp = &_fmd_ckpt_sections[type]; ckp->ckp_ptr = (uchar_t *) P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align); ckp->ckp_secp->fcfs_type = type; ckp->ckp_secp->fcfs_align = dp->secd_align; ckp->ckp_secp->fcfs_flags = 0; ckp->ckp_secp->fcfs_entsize = dp->secd_entsize; ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf); ckp->ckp_secp->fcfs_size = size; /* * If the data pointer is non-NULL, copy the data to our buffer; else * the caller is responsible for doing so and updating ckp->ckp_ptr. */ if (data != NULL) { bcopy(data, ckp->ckp_ptr, size); ckp->ckp_ptr += size; } ckp->ckp_secp++; return (ckp->ckp_secs++); }
/*ARGSUSED*/ void mach_cpucontext_free(struct cpu *cp, void *arg, int err) { struct cpu_tables *ct = arg; ASSERT(&ct->ct_tss == cp->cpu_tss); switch (err) { case 0: break; case ETIMEDOUT: /* * The processor was poked, but failed to start before * we gave up waiting for it. In case it starts later, * don't free anything. */ break; default: /* * Some other, passive, error occurred. */ kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE)); cp->cpu_tss = NULL; break; } }
void flowadv_init(void) { STAILQ_INIT(&fadv_list); /* Setup lock group and attribute for fadv_lock */ fadv_lock_grp_attr = lck_grp_attr_alloc_init(); fadv_lock_grp = lck_grp_alloc_init("fadv_lock", fadv_lock_grp_attr); lck_mtx_init(&fadv_lock, fadv_lock_grp, NULL); fadv_zone_size = P2ROUNDUP(sizeof (struct flowadv_fcentry), sizeof (u_int64_t)); fadv_zone = zinit(fadv_zone_size, FADV_ZONE_MAX * fadv_zone_size, 0, FADV_ZONE_NAME); if (fadv_zone == NULL) { panic("%s: failed allocating %s", __func__, FADV_ZONE_NAME); /* NOTREACHED */ } zone_change(fadv_zone, Z_EXPAND, TRUE); zone_change(fadv_zone, Z_CALLERACCT, FALSE); if (kernel_thread_start(flowadv_thread_func, NULL, &fadv_thread) != KERN_SUCCESS) { panic("%s: couldn't create flow event advisory thread", __func__); /* NOTREACHED */ } thread_deallocate(fadv_thread); }
static void fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align) { if (size != 0) { ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size; ckp->ckp_secs++; } }
static void sem_dtor(kipc_perm_t *perm) { ksemid_t *sp = (ksemid_t *)perm; kmem_free(sp->sem_base, P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); list_destroy(&sp->sem_undos); }
static void trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) { trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t size, offset, txgtarget, txgsafe; int64_t hard, soft; hrtime_t timelimit; ASSERT(vd->vdev_ops->vdev_op_leaf); if (tm == NULL) return; timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC; if (vd->vdev_isl2cache) { txgsafe = UINT64_MAX; txgtarget = UINT64_MAX; } else { txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)); if (txgsafe > trim_txg_delay) txgtarget = txgsafe - trim_txg_delay; else txgtarget = 0; } mutex_enter(&tm->tm_lock); hard = 0; if (tm->tm_pending > trim_vdev_max_pending) hard = (tm->tm_pending - trim_vdev_max_pending) / 4; soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64); /* Loop until we have sent all outstanding free's */ while (soft > 0 && (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0)) != NULL) { TRIM_MAP_REM(tm, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts); size = ts->ts_end - ts->ts_start; offset = ts->ts_start; /* * We drop the lock while we call zio_nowait as the IO * scheduler can result in a different IO being run e.g. * a write which would result in a recursive lock. */ mutex_exit(&tm->tm_lock); zio_nowait(zio_trim(zio, spa, vd, offset, size)); soft -= TRIM_MAP_SEGS(size); hard -= TRIM_MAP_SEGS(size); mutex_enter(&tm->tm_lock); } mutex_exit(&tm->tm_lock); }
static void zvol_discard(void *arg) { struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; uint64_t start = blk_rq_pos(req) << 9; uint64_t end = start + blk_rq_bytes(req); int error; rl_t *rl; /* * Annotate this call path with a flag that indicates that it is * unsafe to use KM_SLEEP during memory allocations due to the * potential for a deadlock. KM_PUSHPAGE should be used instead. */ ASSERT(!(current->flags & PF_NOFS)); current->flags |= PF_NOFS; if (end > zv->zv_volsize) { blk_end_request(req, -EIO, blk_rq_bytes(req)); goto out; } /* * Align the request to volume block boundaries. If we don't, * then this will force dnode_free_range() to zero out the * unaligned parts, which is slow (read-modify-write) and * useless since we are not freeing any space by doing so. */ start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); if (start >= end) { blk_end_request(req, 0, blk_rq_bytes(req)); goto out; } rl = zfs_range_lock(&zv->zv_znode, start, end - start, RL_WRITER); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, end - start); /* * TODO: maybe we should add the operation to the log. */ zfs_range_unlock(rl); blk_end_request(req, -error, blk_rq_bytes(req)); out: current->flags &= ~PF_NOFS; }
/*ARGSUSED*/ static int smb_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred) { smb_clone_t *cp = &smb_clones[getminor(dev)]; size_t alen = P2ROUNDUP(len, PAGESIZE); caddr_t addr; iovec_t iov; uio_t uio; int err; if (len <= 0 || (flags & MAP_FIXED)) return (EINVAL); if ((prot & PROT_WRITE) && (flags & MAP_SHARED)) return (EACCES); if (off < 0 || off + len < off || off + len > cp->c_eplen + cp->c_stlen) return (ENXIO); as_rangelock(as); map_addr(&addr, alen, 0, 1, 0); if (addr != NULL) err = as_map(as, addr, alen, segvn_create, zfod_argsp); else err = ENOMEM; as_rangeunlock(as); *addrp = addr; if (err != 0) return (err); iov.iov_base = addr; iov.iov_len = len; bzero(&uio, sizeof (uio_t)); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = off; uio.uio_segflg = UIO_USERSPACE; uio.uio_extflg = UIO_COPY_DEFAULT; uio.uio_resid = len; if ((err = smb_uiomove(cp, &uio)) != 0) (void) as_unmap(as, addr, alen); return (err); }
static int zvol_discard(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t start = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); uint64_t end = start + size; int error; rl_t *rl; dmu_tx_t *tx; ASSERT(zv && zv->zv_open_count > 0); if (end > zv->zv_volsize) return (SET_ERROR(EIO)); /* * Align the request to volume block boundaries when REQ_SECURE is * available, but not requested. If we don't, then this will force * dnode_free_range() to zero out the unaligned parts, which is slow * (read-modify-write) and useless since we are not freeing any space * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through * 2.6.35) will not receive this optimization. */ #ifdef REQ_SECURE if (!(bio->bi_rw & REQ_SECURE)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } #endif if (start >= end) return (0); rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { zvol_log_truncate(zv, tx, start, size, B_TRUE); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); } zfs_range_unlock(rl); return (error); }
kthread_t * zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, uint64_t len, proc_t *pp, int state, pri_t pri, int detachstate) { kthread_t *kt; pthread_attr_t attr; char *stkstr; ASSERT0(state & ~TS_RUN); ASSERT0(len); kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL); kt->t_func = func; kt->t_arg = arg; kt->t_pri = pri; VERIFY0(pthread_attr_init(&attr)); VERIFY0(pthread_attr_setdetachstate(&attr, detachstate)); /* * We allow the default stack size in user space to be specified by * setting the ZFS_STACK_SIZE environment variable. This allows us * the convenience of observing and debugging stack overruns in * user space. Explicitly specified stack sizes will be honored. * The usage of ZFS_STACK_SIZE is discussed further in the * ENVIRONMENT VARIABLES sections of the ztest(1) man page. */ if (stksize == 0) { stkstr = getenv("ZFS_STACK_SIZE"); if (stkstr == NULL) stksize = TS_STACK_MAX; else stksize = MAX(atoi(stkstr), TS_STACK_MIN); } VERIFY3S(stksize, >, 0); stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE); /* * If this ever fails, it may be because the stack size is not a * multiple of system page size. */ VERIFY0(pthread_attr_setstacksize(&attr, stksize)); VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE)); VERIFY0(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt)); VERIFY0(pthread_attr_destroy(&attr)); return (kt); }
/* * Default asize function: return the MAX of psize with the asize of * all children. This is what's used by anything other than RAID-Z. */ uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize) { uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift); uint64_t csize; uint64_t c; for (c = 0; c < vd->vdev_children; c++) { csize = vdev_psize_to_asize(vd->vdev_child[c], psize); asize = MAX(asize, csize); } return (asize); }
static void fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp) { fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; fmd_case_susp_t *cis; uint_t n; if (cip->ci_xprt != NULL) return; /* do not checkpoint cases from remote transports */ n = fmd_buf_hash_count(&cip->ci_bufs); fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp); fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t)); if (cip->ci_principal != NULL) fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t)); fmd_ckpt_resv(ckp, sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t)); if (cip->ci_nsuspects != 0) ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t)); cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */ for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { size_t nvsize = 0; (void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE); cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize; cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t)); } fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t)); fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t)); ckp->ckp_strn += strlen(cip->ci_uuid) + 1; }
static void add_stage2_einfo(ig_stage2_t *stage2, char *updt_str) { bblk_hs_t hs; uint32_t avail_space; assert(stage2 != NULL); /* Fill bootblock hashing source information. */ hs.src_buf = (unsigned char *)stage2->file; hs.src_size = stage2->file_size; /* How much space for the extended information structure? */ avail_space = stage2->buf_size - P2ROUNDUP(stage2->file_size, 8); add_einfo(stage2->extra, updt_str, &hs, avail_space); }
static int zvol_discard(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t start = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); uint64_t end = start + size; int error; rl_t *rl; dmu_tx_t *tx; ASSERT(zv && zv->zv_open_count > 0); if (end > zv->zv_volsize) return (SET_ERROR(EIO)); /* * Align the request to volume block boundaries when a secure erase is * not required. This will prevent dnode_free_range() from zeroing out * the unaligned parts which is slow (read-modify-write) and useless * since we are not freeing any space by doing so. */ if (!bio_is_secure_erase(bio)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } if (start >= end) return (0); rl = zfs_range_lock(&zv->zv_range_lock, start, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { zvol_log_truncate(zv, tx, start, size, B_TRUE); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); } zfs_range_unlock(rl); return (error); }
static void fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl) { fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr; char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t); size_t nvsize = 0; (void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE); fcfn->fcfn_size = (uint64_t)nvsize; (void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0); ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize; ckp->ckp_ptr = (uchar_t *) P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t)); }
/*ARGSUSED*/ kthread_t * zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state) { pthread_attr_t attr; pthread_t tid; char *stkstr; int detachstate = PTHREAD_CREATE_DETACHED; VERIFY0(pthread_attr_init(&attr)); if (state & TS_JOINABLE) detachstate = PTHREAD_CREATE_JOINABLE; VERIFY0(pthread_attr_setdetachstate(&attr, detachstate)); /* * We allow the default stack size in user space to be specified by * setting the ZFS_STACK_SIZE environment variable. This allows us * the convenience of observing and debugging stack overruns in * user space. Explicitly specified stack sizes will be honored. * The usage of ZFS_STACK_SIZE is discussed further in the * ENVIRONMENT VARIABLES sections of the ztest(1) man page. */ if (stksize == 0) { stkstr = getenv("ZFS_STACK_SIZE"); if (stkstr == NULL) stksize = TS_STACK_MAX; else stksize = MAX(atoi(stkstr), TS_STACK_MIN); } VERIFY3S(stksize, >, 0); stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE); /* * If this ever fails, it may be because the stack size is not a * multiple of system page size. */ VERIFY0(pthread_attr_setstacksize(&attr, stksize)); VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE)); VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg)); VERIFY0(pthread_attr_destroy(&attr)); return ((void *)(uintptr_t)tid); }
int main(void) { pstatus_t status; void *buf; /* * Alignment must be sizeof (void *) (word) aligned. */ VERIFY3P(aligned_alloc(sizeof (void *) - 1, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); VERIFY3P(aligned_alloc(sizeof (void *) + 1, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); VERIFY3P(aligned_alloc(23, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); buf = aligned_alloc(sizeof (void *), 16); VERIFY3P(buf, !=, NULL); free(buf); /* * Cause ENOMEM */ VERIFY0(proc_get_status(getpid(), &status)); VERIFY3P(mmap((caddr_t)P2ROUNDUP(status.pr_brkbase + status.pr_brksize, 0x1000), 0x1000, PROT_READ, MAP_ANON | MAP_FIXED | MAP_PRIVATE, -1, 0), !=, (void *)-1); for (;;) { if (malloc(16) == NULL) break; } for (;;) { if (aligned_alloc(sizeof (void *), 16) == NULL) break; } VERIFY3P(aligned_alloc(sizeof (void *), 16), ==, NULL); VERIFY3S(errno, ==, ENOMEM); return (0); }
/* * Copy in a memory list from boot to kernel, with a filter function * to remove pages. The filter function can increase the address and/or * decrease the size to filter out pages. It will also align addresses and * sizes to PAGESIZE. */ void copy_memlist_filter( struct memlist *src, struct memlist **dstp, void (*filter)(uint64_t *, uint64_t *)) { struct memlist *dst, *prev; uint64_t addr; uint64_t size; uint64_t eaddr; dst = *dstp; prev = dst; /* * Move through the memlist applying a filter against * each range of memory. Note that we may apply the * filter multiple times against each memlist entry. */ for (; src; src = src->ml_next) { addr = P2ROUNDUP(src->ml_address, PAGESIZE); eaddr = P2ALIGN(src->ml_address + src->ml_size, PAGESIZE); while (addr < eaddr) { size = eaddr - addr; if (filter != NULL) filter(&addr, &size); if (size == 0) break; dst->ml_address = addr; dst->ml_size = size; dst->ml_next = 0; if (prev == dst) { dst->ml_prev = 0; dst++; } else { dst->ml_prev = prev; prev->ml_next = dst; dst++; prev++; } addr += size; } } *dstp = dst; }
static void zvol_discard(void *arg) { struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t start = blk_rq_pos(req) << 9; uint64_t end = start + blk_rq_bytes(req); int error; rl_t *rl; if (end > zv->zv_volsize) { error = EIO; goto out; } /* * Align the request to volume block boundaries. If we don't, * then this will force dnode_free_range() to zero out the * unaligned parts, which is slow (read-modify-write) and * useless since we are not freeing any space by doing so. */ start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); if (start >= end) { error = 0; goto out; } rl = zfs_range_lock(&zv->zv_znode, start, end - start, RL_WRITER); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, end-start); /* * TODO: maybe we should add the operation to the log. */ zfs_range_unlock(rl); out: blk_end_request(req, -error, blk_rq_bytes(req)); spl_fstrans_unmark(cookie); }
static int zvol_discard(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t start = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); uint64_t end = start + size; int error; rl_t *rl; if (end > zv->zv_volsize) return (SET_ERROR(EIO)); /* * Align the request to volume block boundaries when REQ_SECURE is * available, but not requested. If we don't, then this will force * dnode_free_range() to zero out the unaligned parts, which is slow * (read-modify-write) and useless since we are not freeing any space * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through * 2.6.35) will not receive this optimization. */ #ifdef REQ_SECURE if (!(bio->bi_rw & REQ_SECURE)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } #endif if (start >= end) return (0); rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); /* * TODO: maybe we should add the operation to the log. */ zfs_range_unlock(rl); return (error); }
static ctf_id_t ctf_add_encoded(ctf_file_t *fp, uint_t flag, const char *name, const ctf_encoding_t *ep, uint_t kind) { ctf_dtdef_t *dtd; ctf_id_t type; if (ep == NULL) return (ctf_set_errno(fp, EINVAL)); if ((type = ctf_add_generic(fp, flag, name, &dtd)) == CTF_ERR) return (CTF_ERR); /* errno is set for us */ dtd->dtd_data.ctt_info = CTF_TYPE_INFO(kind, flag, 0); dtd->dtd_data.ctt_size = clp2(P2ROUNDUP(ep->cte_bits, NBBY) / NBBY); dtd->dtd_u.dtu_enc = *ep; return (type); }
int smbios_write(smbios_hdl_t *shp, int fd) { smbios_entry_t ep; off64_t off = lseek64(fd, 0, SEEK_CUR) + P2ROUNDUP(sizeof (ep), 16); if (off > UINT32_MAX) return (smb_set_errno(shp, EOVERFLOW)); bcopy(&shp->sh_ent, &ep, sizeof (ep)); ep.smbe_staddr = (uint32_t)off; smbios_checksum(shp, &ep); if (smbios_xwrite(shp, fd, &ep, sizeof (ep)) == -1 || lseek64(fd, off, SEEK_SET) != off || smbios_xwrite(shp, fd, shp->sh_buf, shp->sh_buflen) == -1) return (-1); return (0); }