/* * Initialize the buffer I/O system by freeing * all buffers and setting all device hash buffer lists to empty. */ void binit(void) { struct buf *bp; unsigned int i, pct; ulong_t bio_max_hwm, bio_default_hwm; /* * Maximum/Default values for bufhwm are set to the smallest of: * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory * - 1/4 of kernel virtual memory * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int). * Additionally, in order to allow simple tuning by percentage of * physical memory, bufhwm_pct is used to calculate the default if * the value of this tunable is between 0 and BIO_MAX_PERCENT. * * Since the unit for v.v_bufhwm is kilobytes, this allows for * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers. */ bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT, btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); bio_max_hwm = MIN(INT32_MAX, bio_max_hwm); pct = BIO_BUF_PERCENT; if (bufhwm_pct != 0 && ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) { pct = BIO_BUF_PERCENT; /* * Invalid user specified value, emit a warning. */ cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \ range(1..%d). Using %d as default.", bufhwm_pct, 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT); }
/* * This test creates N threads with a shared kmem cache. They then all * concurrently allocate and free from the cache to stress the locking and * concurrent cache performance. If any one test takes longer than 5 * seconds to complete it is treated as a failure and may indicate a * performance regression. On my test system no one test takes more * than 1 second to complete so a 5x slowdown likely a problem. */ static int splat_kmem_test10(struct file *file, void *arg) { uint64_t size, alloc, maxsize, limit, rc = 0; #if defined(CONFIG_64BIT) maxsize = (1024 * 1024); #else maxsize = (128 * 1024); #endif for (size = 32; size <= maxsize; size *= 2) { splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", "time (sec)\tslabs \tobjs \thash\n"); splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "", " \ttot/max/calc\ttot/max/calc\n"); for (alloc = 1; alloc <= 1024; alloc *= 2) { /* Skip tests which exceed 1/2 of memory. */ limit = MIN(physmem * PAGE_SIZE, vmem_size(NULL, VMEM_ALLOC | VMEM_FREE)) / 2; if (size * alloc * SPLAT_KMEM_THREADS > limit) continue; rc = splat_kmem_cache_thread_test(file, arg, SPLAT_KMEM_TEST10_NAME, size, alloc, 5); if (rc) break; } } return rc; }
static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { u_long size; size = vmem_size(kmem_arena, VMEM_FREE); return (sysctl_handle_long(oidp, &size, 0, req)); }
/* * segkmem_alloc_lpi() imports virtual memory from large page heap arena * into kmem_lp arena. In the process it maps the imported segment with * large pages */ static void * segkmem_alloc_lpi(vmem_t *vmp, size_t size, int vmflag) { segkmem_lpcb_t *lpcb = &segkmem_lpcb; void *addr; ASSERT(size != 0); ASSERT(vmp == heap_lp_arena); /* do not allow large page heap grow beyound limits */ if (vmem_size(vmp, VMEM_ALLOC) >= segkmem_kmemlp_max) { lpcb->allocs_limited++; return (NULL); } addr = segkmem_xalloc_lp(vmp, NULL, size, vmflag, 0, segkmem_page_create_large, NULL); return (addr); }
/* * Functions to allocate node id's starting from 1. Based on vmem routines. * The vmem arena is extended in NM_INOQUANT chunks. */ uint64_t namenodeno_alloc(void) { uint64_t nno; mutex_enter(&nm_inolock); nno = (uint64_t)(uintptr_t) vmem_alloc(nm_inoarena, 1, VM_NOSLEEP + VM_FIRSTFIT); if (nno == 0) { (void) vmem_add(nm_inoarena, (void *)(vmem_size(nm_inoarena, VMEM_ALLOC | VMEM_FREE) + 1), NM_INOQUANT, VM_SLEEP); nno = (uint64_t)(uintptr_t) vmem_alloc(nm_inoarena, 1, VM_SLEEP + VM_FIRSTFIT); ASSERT(nno != 0); } mutex_exit(&nm_inolock); ASSERT32(nno <= ULONG_MAX); return (nno); }
static void segkmem_dump_range(void *arg, void *start, size_t size) { caddr_t addr = start; caddr_t addr_end = addr + size; /* * If we are about to start dumping the range of addresses we * carved out of the kernel heap for the large page heap walk * heap_lp_arena to find what segments are actually populated */ if (SEGKMEM_USE_LARGEPAGES && addr == heap_lp_base && addr_end == heap_lp_end && vmem_size(heap_lp_arena, VMEM_ALLOC) < size) { vmem_walk(heap_lp_arena, VMEM_ALLOC | VMEM_REENTRANT, segkmem_xdump_range, arg); } else { segkmem_xdump_range(arg, start, size); } }
/* * Check vmem_size() behavior by acquiring the alloc/free/total vmem * space, then allocate a known buffer size from vmem space. We can * then check that vmem_size() values were updated properly with in * a fairly small tolerence. The tolerance is important because we * are not the only vmem consumer on the system. Other unrelated * allocations might occur during the small test window. The vmem * allocation itself may also add in a little extra private space to * the buffer. Finally, verify total space always remains unchanged. */ static int splat_kmem_test12(struct file *file, void *arg) { size_t alloc1, free1, total1; size_t alloc2, free2, total2; int size = 8*1024*1024; void *ptr; alloc1 = vmem_size(NULL, VMEM_ALLOC); free1 = vmem_size(NULL, VMEM_FREE); total1 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%lu " "free=%lu total=%lu\n", (unsigned long)alloc1, (unsigned long)free1, (unsigned long)total1); splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Alloc %d bytes\n", size); ptr = vmem_alloc(size, KM_SLEEP); if (!ptr) { splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed to alloc %d bytes\n", size); return -ENOMEM; } alloc2 = vmem_size(NULL, VMEM_ALLOC); free2 = vmem_size(NULL, VMEM_FREE); total2 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%lu " "free=%lu total=%lu\n", (unsigned long)alloc2, (unsigned long)free2, (unsigned long)total2); splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Free %d bytes\n", size); vmem_free(ptr, size); if (alloc2 < (alloc1 + size - (size / 100)) || alloc2 > (alloc1 + size + (size / 100))) { splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed " "VMEM_ALLOC size: %lu != %lu+%d (+/- 1%%)\n", (unsigned long)alloc2,(unsigned long)alloc1,size); return -ERANGE; } if (free2 < (free1 - size - (size / 100)) || free2 > (free1 - size + (size / 100))) { splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed " "VMEM_FREE size: %lu != %lu-%d (+/- 1%%)\n", (unsigned long)free2, (unsigned long)free1, size); return -ERANGE; } if (total1 != total2) { splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed " "VMEM_ALLOC | VMEM_FREE not constant: " "%lu != %lu\n", (unsigned long)total2, (unsigned long)total1); return -ERANGE; } splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "VMEM_ALLOC within tolerance: ~%ld%% (%ld/%d)\n", (long)abs(alloc1 + (long)size - alloc2) * 100 / (long)size, (long)abs(alloc1 + (long)size - alloc2), size); splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "VMEM_FREE within tolerance: ~%ld%% (%ld/%d)\n", (long)abs((free1 - (long)size) - free2) * 100 / (long)size, (long)abs((free1 - (long)size) - free2), size); return 0; }
/* * Allocate a single object of specified size with specified flags * (either M_WAITOK or M_NOWAIT). */ void * memguard_alloc(unsigned long req_size, int flags) { vm_offset_t addr; u_long size_p, size_v; int do_guard, rv; size_p = round_page(req_size); if (size_p == 0) return (NULL); /* * To ensure there are holes on both sides of the allocation, * request 2 extra pages of KVA. We will only actually add a * vm_map_entry and get pages for the original request. Save * the value of memguard_options so we have a consistent * value. */ size_v = size_p; do_guard = (memguard_options & MG_GUARD_AROUND) != 0; if (do_guard) size_v += 2 * PAGE_SIZE; /* * When we pass our memory limit, reject sub-page allocations. * Page-size and larger allocations will use the same amount * of physical memory whether we allocate or hand off to * uma_large_alloc(), so keep those. */ if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit && req_size < PAGE_SIZE) { addr = (vm_offset_t)NULL; memguard_fail_pgs++; goto out; } /* * Keep a moving cursor so we don't recycle KVA as long as * possible. It's not perfect, since we don't know in what * order previous allocations will be free'd, but it's simple * and fast, and requires O(1) additional storage if guard * pages are not used. * * XXX This scheme will lead to greater fragmentation of the * map, unless vm_map_findspace() is tweaked. */ for (;;) { if (vmem_xalloc(memguard_arena, size_v, 0, 0, 0, memguard_cursor, VMEM_ADDR_MAX, M_BESTFIT | M_NOWAIT, &addr) == 0) break; /* * The map has no space. This may be due to * fragmentation, or because the cursor is near the * end of the map. */ if (memguard_cursor == memguard_base) { memguard_fail_kva++; addr = (vm_offset_t)NULL; goto out; } memguard_wrap++; memguard_cursor = memguard_base; } if (do_guard) addr += PAGE_SIZE; rv = kmem_back(kmem_object, addr, size_p, flags); if (rv != KERN_SUCCESS) { vmem_xfree(memguard_arena, addr, size_v); memguard_fail_pgs++; addr = (vm_offset_t)NULL; goto out; } memguard_cursor = addr + size_v; *v2sizep(trunc_page(addr)) = req_size; *v2sizev(trunc_page(addr)) = size_v; memguard_succ++; if (req_size < PAGE_SIZE) { memguard_wasted += (PAGE_SIZE - req_size); if (do_guard) { /* * Align the request to 16 bytes, and return * an address near the end of the page, to * better detect array overrun. */ req_size = roundup2(req_size, 16); addr += (PAGE_SIZE - req_size); } } out: return ((void *)addr); }
pct = BIO_BUF_PERCENT; if (bufhwm_pct != 0 && ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) { pct = BIO_BUF_PERCENT; /* * Invalid user specified value, emit a warning. */ cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \ range(1..%d). Using %d as default.", bufhwm_pct, 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT); } bio_default_hwm = MIN(physmem / pct, btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); bio_default_hwm = MIN(INT32_MAX, bio_default_hwm); if ((v.v_bufhwm = bufhwm) == 0) v.v_bufhwm = bio_default_hwm; if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) { v.v_bufhwm = (int)bio_max_hwm; /* * Invalid user specified value, emit a warning. */ cmn_err(CE_WARN, "binit: bufhwm(%d) out \ of range(%d..%lu). Using %lu as default", bufhwm, BIO_MIN_HWM, bio_max_hwm, bio_max_hwm);
/*ARGSUSED*/ void * segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag) { size_t size; kthread_t *t = curthread; segkmem_lpcb_t *lpcb = &segkmem_lpcb; ASSERT(sizep != NULL); size = *sizep; if (lpcb->lp_uselp && !(t->t_flag & T_PANIC) && !(vmflag & SEGKMEM_SHARELOCKED)) { size_t kmemlp_qnt = segkmem_kmemlp_quantum; size_t asize = P2ROUNDUP(size, kmemlp_qnt); void *addr = NULL; ulong_t *lpthrtp = &lpcb->lp_throttle; ulong_t lpthrt = *lpthrtp; int dowakeup = 0; int doalloc = 1; ASSERT(kmem_lp_arena != NULL); ASSERT(asize >= size); if (lpthrt != 0) { /* try to update the throttle value */ lpthrt = atomic_inc_ulong_nv(lpthrtp); if (lpthrt >= segkmem_lpthrottle_max) { lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, segkmem_lpthrottle_max / 4); } /* * when we get above throttle start do an exponential * backoff at trying large pages and reaping */ if (lpthrt > segkmem_lpthrottle_start && !ISP2(lpthrt)) { lpcb->allocs_throttled++; lpthrt--; if (ISP2(lpthrt)) kmem_reap(); return (segkmem_alloc(vmp, size, vmflag)); } } if (!(vmflag & VM_NOSLEEP) && segkmem_heaplp_quantum >= (8 * kmemlp_qnt) && vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt && asize < (segkmem_heaplp_quantum - kmemlp_qnt)) { /* * we are low on free memory in kmem_lp_arena * we let only one guy to allocate heap_lp * quantum size chunk that everybody is going to * share */ mutex_enter(&lpcb->lp_lock); if (lpcb->lp_wait) { /* we are not the first one - wait */ cv_wait(&lpcb->lp_cv, &lpcb->lp_lock); if (vmem_size(kmem_lp_arena, VMEM_FREE) < kmemlp_qnt) { doalloc = 0; } } else if (vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt) { /* * we are the first one, make sure we import * a large page */ if (asize == kmemlp_qnt) asize += kmemlp_qnt; dowakeup = 1; lpcb->lp_wait = 1; } mutex_exit(&lpcb->lp_lock); } /* * VM_ABORT flag prevents sleeps in vmem_xalloc when * large pages are not available. In that case this allocation * attempt will fail and we will retry allocation with small * pages. We also do not want to panic if this allocation fails * because we are going to retry. */ if (doalloc) { addr = vmem_alloc(kmem_lp_arena, asize, (vmflag | VM_ABORT) & ~VM_PANIC); if (dowakeup) { mutex_enter(&lpcb->lp_lock); ASSERT(lpcb->lp_wait != 0); lpcb->lp_wait = 0; cv_broadcast(&lpcb->lp_cv); mutex_exit(&lpcb->lp_lock); } } if (addr != NULL) { *sizep = asize; *lpthrtp = 0; return (addr); } if (vmflag & VM_NOSLEEP) lpcb->nosleep_allocs_failed++; else lpcb->sleep_allocs_failed++; lpcb->alloc_bytes_failed += size; /* if large page throttling is not started yet do it */ if (segkmem_use_lpthrottle && lpthrt == 0) { lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, 1); } } return (segkmem_alloc(vmp, size, vmflag)); }