/* * Take rctl action when the requested file descriptor is too big. */ static void fd_too_big(proc_t *p) { mutex_enter(&p->p_lock); (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], p->p_rctls, p, RCA_SAFE); mutex_exit(&p->p_lock); }
/* * wrxmem does the real work of write requests for xmemfs. */ static int wrxmem(struct xmount *xm, struct xmemnode *xp, struct uio *uio, struct cred *cr, struct caller_context *ct) { uint_t blockoffset; /* offset in the block */ uint_t blkwr; /* offset in blocks into xmem file */ uint_t blkcnt; caddr_t base; ssize_t bytes; /* bytes to uiomove */ struct vnode *vp; int error = 0; size_t bsize = xm->xm_bsize; rlim64_t limit = uio->uio_llimit; long oresid = uio->uio_resid; timestruc_t now; offset_t offset; /* * xp->xn_size is incremented before the uiomove * is done on a write. If the move fails (bad user * address) reset xp->xn_size. * The better way would be to increment xp->xn_size * only if the uiomove succeeds. */ long xn_size_changed = 0; offset_t old_xn_size; vp = XNTOV(xp); ASSERT(vp->v_type == VREG); XMEMPRINTF(1, ("wrxmem: vp %p resid %lx off %llx\n", (void *)vp, uio->uio_resid, uio->uio_loffset)); ASSERT(RW_WRITE_HELD(&xp->xn_contents)); ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); if (MANDLOCK(vp, xp->xn_mode)) { rw_exit(&xp->xn_contents); /* * xmem_getattr ends up being called by chklock */ error = chklock(vp, FWRITE, uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); rw_enter(&xp->xn_contents, RW_WRITER); if (error != 0) { XMEMPRINTF(8, ("wrxmem: vp %p error %x\n", (void *)vp, error)); return (error); } } if ((offset = uio->uio_loffset) < 0) return (EINVAL); if (offset >= limit) { proc_t *p = ttoproc(curthread); mutex_enter(&p->p_lock); (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, p, RCA_UNSAFE_SIGINFO); mutex_exit(&p->p_lock); return (EFBIG); } if (uio->uio_resid == 0) { XMEMPRINTF(8, ("wrxmem: vp %p resid %lx\n", (void *)vp, uio->uio_resid)); return (0); } /* * Get the highest blocknumber and allocate page array if needed. * Note that if xm_bsize != PAGESIZE, each ppa[] is pointer to * a page array rather than just a page. */ blkcnt = howmany((offset + uio->uio_resid), bsize); blkwr = offset >> xm->xm_bshift; /* write begins here */ XMEMPRINTF(1, ("wrxmem: vp %p blkcnt %x blkwr %x xn_ppasz %lx\n", (void *)vp, blkcnt, blkwr, xp->xn_ppasz)); /* file size increase */ if (xp->xn_ppasz < blkcnt) { page_t ***ppa; int ppasz; uint_t blksinfile = howmany(xp->xn_size, bsize); /* * check if sufficient blocks available for the given offset. */ if (blkcnt - blksinfile > xm->xm_max - xm->xm_mem) return (ENOSPC); /* * to prevent reallocating every time the file grows by a * single block, double the size of the array. */ if (blkcnt < xp->xn_ppasz * 2) ppasz = xp->xn_ppasz * 2; else ppasz = blkcnt; ppa = kmem_zalloc(ppasz * sizeof (page_t **), KM_SLEEP); ASSERT(ppa); if (xp->xn_ppasz) { bcopy(xp->xn_ppa, ppa, blksinfile * sizeof (*ppa)); kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); } xp->xn_ppa = ppa; xp->xn_ppasz = ppasz; /* * fill in the 'hole' if write offset beyond file size. This * helps in creating large files quickly; an application can * lseek to a large offset and perform a single write * operation to create the large file. */ if (blksinfile < blkwr) { old_xn_size = xp->xn_size; xp->xn_size = (offset_t)blkwr * bsize; XMEMPRINTF(4, ("wrxmem: fill vp %p blks %x to %x\n", (void *)vp, blksinfile, blkcnt - 1)); error = xmem_fillpages(xp, vp, (offset_t)blksinfile * bsize, (offset_t)(blkcnt - blksinfile) * bsize, 1); if (error) { /* truncate file back to original size */ (void) xmemnode_trunc(xm, xp, old_xn_size); return (error); } /* * if error on blkwr, this allows truncation of the * filled hole. */ xp->xn_size = old_xn_size; } } do { offset_t pagestart, pageend; page_t **ppp; blockoffset = (uint_t)offset & (bsize - 1); /* * A maximum of xm->xm_bsize bytes of data is transferred * each pass through this loop */ bytes = MIN(bsize - blockoffset, uio->uio_resid); ASSERT(bytes); if (offset + bytes >= limit) { if (offset >= limit) { error = EFBIG; goto out; } bytes = limit - offset; } if (!xp->xn_ppa[blkwr]) { /* zero fill new pages - simplify partial updates */ error = xmem_fillpages(xp, vp, offset, bytes, 1); if (error) return (error); } /* grow the file to the new length */ if (offset + bytes > xp->xn_size) { xn_size_changed = 1; old_xn_size = xp->xn_size; xp->xn_size = offset + bytes; } #ifdef LOCKNEST xmem_getpage(); #endif /* xn_ppa[] is a page_t * if ppb == 1 */ if (xm->xm_ppb == 1) ppp = (page_t **)&xp->xn_ppa[blkwr]; else ppp = &xp->xn_ppa[blkwr][btop(blockoffset)]; pagestart = offset & ~(offset_t)(PAGESIZE - 1); /* * subtract 1 in case (offset + bytes) is mod PAGESIZE * so that pageend is the actual index of last page. */ pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); base = segxmem_getmap(xm->xm_map, vp, pagestart, pageend - pagestart + PAGESIZE, ppp, S_WRITE); rw_exit(&xp->xn_contents); error = uiomove(base + (offset - pagestart), bytes, UIO_WRITE, uio); segxmem_release(xm->xm_map, base, pageend - pagestart + PAGESIZE); /* * Re-acquire contents lock. */ rw_enter(&xp->xn_contents, RW_WRITER); /* * If the uiomove failed, fix up xn_size. */ if (error) { if (xn_size_changed) { /* * The uiomove failed, and we * allocated blocks,so get rid * of them. */ (void) xmemnode_trunc(xm, xp, old_xn_size); } } else { if ((xp->xn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && (xp->xn_mode & (S_ISUID | S_ISGID)) && secpolicy_vnode_setid_retain(cr, (xp->xn_mode & S_ISUID) != 0 && xp->xn_uid == 0) != 0) { /* * Clear Set-UID & Set-GID bits on * successful write if not privileged * and at least one of the execute bits * is set. If we always clear Set-GID, * mandatory file and record locking is * unuseable. */ xp->xn_mode &= ~(S_ISUID | S_ISGID); } gethrestime(&now); xp->xn_mtime = now; xp->xn_ctime = now; } offset = uio->uio_loffset; /* uiomove sets uio_loffset */ blkwr++; } while (error == 0 && uio->uio_resid > 0 && bytes != 0); out: /* * If we've already done a partial-write, terminate * the write but return no error. */ if (oresid != uio->uio_resid) error = 0; return (error); }
/* * This routine assumes that the stack grows downward. * Returns 0 on success, errno on failure. */ int grow_internal(caddr_t sp, uint_t growszc) { struct proc *p = curproc; size_t newsize; size_t oldsize; int error; size_t pgsz; uint_t szc; struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); ASSERT(sp < p->p_usrstack); sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); /* * grow to growszc alignment but use current p->p_stkpageszc for * the segvn_crargs szc passed to segvn_create. For memcntl to * increase the szc, this allows the new extension segment to be * concatenated successfully with the existing stack segment. */ if ((szc = growszc) != 0) { pgsz = page_get_pagesize(szc); ASSERT(pgsz > PAGESIZE); newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); if (newsize > (size_t)p->p_stk_ctl) { szc = 0; pgsz = PAGESIZE; newsize = p->p_usrstack - sp; } } else { pgsz = PAGESIZE; newsize = p->p_usrstack - sp; } if (newsize > (size_t)p->p_stk_ctl) { (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, RCA_UNSAFE_ALL); return (ENOMEM); } oldsize = p->p_stksize; ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); if (newsize <= oldsize) { /* prevent the stack from shrinking */ return (0); } if (!(p->p_stkprot & PROT_EXEC)) { crargs.prot &= ~PROT_EXEC; } /* * extend stack with the proposed new growszc, which is different * than p_stkpageszc only on a memcntl to increase the stack pagesize. * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes * if not aligned to szc's pgsz. */ if (szc > 0) { caddr_t oldsp = p->p_usrstack - oldsize; caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : AS_MAP_NO_LPOOB; } else if (oldsp == austk) { crargs.szc = szc; } else { crargs.szc = AS_MAP_STACK; } } else { crargs.szc = AS_MAP_NO_LPOOB; } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, segvn_create, &crargs)) != 0) { if (error == EAGAIN) { cmn_err(CE_WARN, "Sorry, no swap space to grow stack " "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); } return (error); } p->p_stksize = newsize; return (0); }
/* * Returns 0 on success. */ int brk_internal(caddr_t nva, uint_t brkszc) { caddr_t ova; /* current break address */ size_t size; int error; struct proc *p = curproc; struct as *as = p->p_as; size_t pgsz; uint_t szc; rctl_qty_t as_rctl; /* * extend heap to brkszc alignment but use current p->p_brkpageszc * for the newly created segment. This allows the new extension * segment to be concatenated successfully with the existing brk * segment. */ if ((szc = brkszc) != 0) { pgsz = page_get_pagesize(szc); ASSERT(pgsz > PAGESIZE); } else { pgsz = PAGESIZE; } mutex_enter(&p->p_lock); as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p); mutex_exit(&p->p_lock); /* * If p_brkbase has not yet been set, the first call * to brk() will initialize it. */ if (p->p_brkbase == 0) p->p_brkbase = nva; /* * Before multiple page size support existed p_brksize was the value * not rounded to the pagesize (i.e. it stored the exact user request * for heap size). If pgsz is greater than PAGESIZE calculate the * heap size as the real new heap size by rounding it up to pgsz. * This is useful since we may want to know where the heap ends * without knowing heap pagesize (e.g. some old code) and also if * heap pagesize changes we can update p_brkpageszc but delay adding * new mapping yet still know from p_brksize where the heap really * ends. The user requested heap end is stored in libc variable. */ if (pgsz > PAGESIZE) { caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); size = tnva - p->p_brkbase; if (tnva < p->p_brkbase || (size > p->p_brksize && size > (size_t)as_rctl)) { szc = 0; pgsz = PAGESIZE; size = nva - p->p_brkbase; } } else { size = nva - p->p_brkbase; } /* * use PAGESIZE to roundup ova because we want to know the real value * of the current heap end in case p_brkpageszc changes since the last * p_brksize was computed. */ nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), PAGESIZE); if ((nva < p->p_brkbase) || (size > p->p_brksize && size > as_rctl)) { mutex_enter(&p->p_lock); (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, RCA_SAFE); mutex_exit(&p->p_lock); return (ENOMEM); } if (nva > ova) { struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); if (!(p->p_datprot & PROT_EXEC)) { crargs.prot &= ~PROT_EXEC; } /* * Add new zfod mapping to extend UNIX data segment * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate * page sizes if ova is not aligned to szc's pgsz. */ if (szc > 0) { caddr_t rbss; rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : AS_MAP_NO_LPOOB; } else if (ova == rbss) { crargs.szc = szc; } else { crargs.szc = AS_MAP_HEAP; } } else { crargs.szc = AS_MAP_NO_LPOOB; } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; error = as_map(as, ova, (size_t)(nva - ova), segvn_create, &crargs); if (error) { return (error); } } else if (nva < ova) { /* * Release mapping to shrink UNIX data segment. */ (void) as_unmap(as, nva, (size_t)(ova - nva)); } p->p_brksize = size; return (0); }