static int pin_and_count_noncontigs (char *addr, u_int datalen) { u_int data, next, ppn, prev_ppn; u_int noncontigs = 0; data = (u_int) addr; prev_ppn = si->si_nppages; while (datalen > 0) { ppn = PGNO (*va2ptep (data)); if (ppn != (prev_ppn + 1)) { noncontigs++; } ppage_pin (&ppages[ppn]); prev_ppn = ppn; /* go to start of next page of data */ next = (data & ~PGMASK) + NBPG; if (next - data >= datalen) break; datalen -= next - data; data = next; } return (noncontigs); }
int disk_prepare_bc_request (u_int devno, u_quad_t blkno, void *vaddr, u_int flags, int *resptr, struct buf **headbpp) { struct buf *bp; /* XXX - test for big blkno wraparound */ if ((devno >= si->si_ndisks) || (((blkno * NBPG) / si->si_disks[devno].d_bsize) >= si->si_disks[devno].d_size)) { warn ("disk_prepare_bc_request: invalid devno (%u) or blkno (%qu)", devno, blkno); return (-E_INVAL); } if (headbpp == NULL) { warn ("disk_prepare_bc_request: headbpp == NULL"); return (-E_INVAL); } bp = disk_buf_alloc(); if (!bp) return (-E_NO_MEM); bp->b_next = NULL; bp->b_sgnext = NULL; bp->b_flags = flags; bp->b_dev = devno; bp->b_blkno = (blkno * NBPG) / si->si_disks[devno].d_bsize; bp->b_bcount = NBPG; bp->b_sgtot = 0; bp->b_memaddr = vaddr; bp->b_envid = curenv->env_id; bp->b_resid = 0; bp->b_resptr = NULL; ppage_pin (kva2pp((u_int) vaddr)); if (*headbpp) { struct buf *tmpbp = *headbpp; while (tmpbp->b_flags & B_SCATGATH) { tmpbp = (struct buf *) tmpbp->b_sgnext; if (tmpbp == NULL) { warn ("disk_prepare_bc_request: bad scatter/gather list"); ppage_unpin (kva2pp((u_int) vaddr)); free (bp); return (-E_INVAL); } } /* XXX - test for big blkno wraparound */ if (bp->b_blkno != (tmpbp->b_blkno + (tmpbp->b_bcount / si->si_disks[bp->b_dev].d_bsize))) { warn ("disk_prepare_bc_request: noncontiguous requests " "(prevblk %qu, size %u, curblk %qu) can't be merged", tmpbp->b_blkno, tmpbp->b_bcount, bp->b_blkno); ppage_unpin (kva2pp((u_int) vaddr)); free (bp); return (-E_INVAL); } (*headbpp)->b_sgtot += NBPG; tmpbp->b_flags |= B_SCATGATH; tmpbp->b_sgnext = bp; } else { *headbpp = bp; bp->b_sgtot = NBPG; } if (resptr) { ppage_pin (kva2pp(((u_int) resptr))); } bp->b_resptr = resptr; return (0); }
/* * sys_disk_request * * Disk I/O without going through the buffer cache. * * xn_user is the name of a pxn that grants access to the disk * reqbp is a list of scatter/gather requests * k is which capability in the env should be checked * * permission is granted to perform the operation if: * 1) the blocks in reqbp are covered by the pxn * 2) the capability gives access to the pxn * */ int sys_disk_request (u_int sn, struct Xn_name *xn_user, struct buf *reqbp, u_int k) { struct Xn_name xn; struct Xn_xtnt xtnt; struct Pxn *pxn; cap c; int ret; int access; struct disk *di; int *resptr = NULL; u_int bcount = 0; struct buf *bp, *segbp, *nsegbp; int noncontigs = 0, nctemp; #ifdef MEASURE_DISK_TIMES disk_pctr_start = rdtsc(); #endif /* XXX - use PFM or copyin instead of isreadable_* */ /* bypass for direct scsi commands */ if (reqbp->b_flags & B_SCSICMD) { return sys_disk_scsicmd (sn, k, reqbp); } /* get the capability */ if ((ret = env_getcap (curenv, k, &c)) < 0) return ret; /* and the pxn */ copyin (xn_user, &xn, sizeof (xn)); if (! (pxn = lookup_pxn (&xn))) { warn ("sys_disk_request: no pxn found"); return (-E_NOT_FOUND); } /* XXX - do we need to check that this is a physical disk? */ /* get a refernce to the disk unit for this command */ di = &(si->si_disks[xn.xa_dev]); /* Iterate over the request list checking: -- if the request is transfering data to/from memory that this user can read/write. -- if the pxn and capability specified give access to these blocks */ for (segbp = reqbp; ; segbp = (struct buf *) segbp->b_sgnext) { if (! (isreadable_varange ((u_int)segbp, sizeof(struct buf)))) { warn ("sys_disk_request: bad reqbp (%p)", segbp); return (-E_FAULT); } if (segbp->b_flags & B_READ) { access = ACL_R; } else { access = ACL_W; } xtnt.xtnt_block = segbp->b_blkno; xtnt.xtnt_size = segbp->b_bcount / di->d_bsize; bcount += segbp->b_bcount; if (! pxn_authorizes_xtnt (pxn, &c, &xtnt, access, &ret)) { warn ("sys_disk_request: pxn/cap does not grant access to block(s)"); return ret; } if (! ((reqbp->b_flags & B_READ) ? iswriteable_varange : isreadable_varange) ((u_int) segbp->b_memaddr, segbp->b_bcount)) { warn ("sys_disk_request: bad b_memaddr: %p (b_bcount %d)", segbp->b_memaddr, segbp->b_bcount); return (-E_FAULT); } if (! (segbp->b_flags & B_SCATGATH)) { if (segbp->b_resptr) { resptr = segbp->b_resptr; if ((((u_int) resptr) % sizeof(u_int)) || !(isvawriteable (resptr))) { warn ("sys_disk_request: bad resptr (%p)", resptr); return (-E_FAULT); } resptr = (int *) pa2kva (va2pa (resptr)); } break; } } if ((reqbp->b_flags & B_SCATGATH) && bcount != reqbp->b_sgtot) { warn ("sys_disk_request: invalid scatter/gather, with total (%u) unequal " "to sum of parts (%u)", reqbp->b_sgtot, bcount); return (-E_INVAL); } /* are we done before we've started? */ if (bcount == 0) { if (resptr) (*resptr)--; return (0); } if (bcount & di->d_bmod) { warn ("sys_disk_request: bad bcount %u", bcount); return (-E_INVAL); } /* copy request into kernel buffer */ segbp = reqbp; nsegbp = NULL; reqbp = NULL; do { segbp->b_dev = di->d_id; bp = copy_and_pin(segbp, segbp->b_bcount, &nctemp); if (!bp) { warn ("sys_disk_request: could not copy_and_pin"); /* XXX - cleanup before returning */ return (-E_NO_MEM); } noncontigs += nctemp; if (nsegbp) nsegbp->b_sgnext = bp; if (!reqbp) reqbp = bp; if (noncontigs >= DISK_MAX_SCATTER) { warn ("sys_disk_request: would require too many scatter/gather entries " "(%d)", noncontigs); /* XXX - cleanup before returning */ return (-E_INVAL); } nsegbp = bp; segbp = segbp->b_sgnext; } while (nsegbp->b_flags & B_SCATGATH); nsegbp->b_resptr = resptr; if (resptr) ppage_pin (kva2pp((u_int) resptr)); /* call appropriate strategy routine */ di->d_strategy (reqbp); #ifdef MEASURE_DISK_TIMES disk_pctr_return = rdtsc(); #endif return (0); }
/* * sys_disk_mbr * * Read/Write the master boot record of a disk. * * The mbr contains the bootstrap code that the BIOS * loads on startup. This is always in sector 0 * of the disk being booted. The mbr also contains the * partition table for the disk. * */ int sys_disk_mbr (u_int sn, int write, u_int dev, int k, char *buffer, int *resptr) { cap c; int ret; struct buf *diskbuf; /* get the capability */ if ((ret = env_getcap (curenv, k, &c)) < 0) return ret; /* make sure the root cap was passed in */ if (!cap_isroot (&c)) return -E_CAP_INSUFF; /* verify the dev */ if (dev >= si->si_ndisks) return -E_NOT_FOUND; /* check and translate the buffers we were given */ if ((((u_int) resptr) % sizeof(u_int)) || !(isvawriteable (resptr))) { warn ("sys_disk_mrb: bad resptr (%p)", resptr); return (-E_FAULT); } ppage_pin (pa2pp ((va2pa (resptr)))); resptr = (int *) pa2kva (va2pa (resptr)); if (write) { if (! (iswriteable_varange ((u_int)buffer, 512))) { warn ("sys_disk_mbr: bad buffer (%p)", buffer); return (-E_FAULT); } } else { if (! (isreadable_varange ((u_int)buffer, 512))) { warn ("sys_disk_mbr: bad buffer (%p)", buffer); return (-E_FAULT); } } /* get a disk req buffer and fill it in */ diskbuf = disk_buf_alloc (); if (!diskbuf) return -E_NO_MEM; diskbuf->b_next = NULL; diskbuf->b_sgnext = NULL; diskbuf->b_dev = dev; diskbuf->b_blkno = 0; diskbuf->b_bcount = 512; /* only want to read the first sector */ diskbuf->b_sgtot = 512; diskbuf->b_memaddr = buffer; diskbuf->b_envid = curenv->env_id; diskbuf->b_resid = 0; diskbuf->b_resptr = resptr; diskbuf->b_flags = B_ABSOLUTE; /* bypass partitions table */ if (write) { diskbuf->b_flags |= B_WRITE; } else { diskbuf->b_flags |= B_READ; } /* pin it in case the user frees it before the request completes. This will be unpinned when sched_reqcomplete is called which in turn calls disk_buf_free which calls ppage_unpin. */ ppage_pin (pa2pp ((va2pa (buffer)))); /* start the request */ si->si_disks[dev].d_strategy (diskbuf); return 0; }
/* XXX - we should use copyin, etc, instead of isreadable_* so that user will get pagefaults he can handle transparently */ static int sys_disk_scsicmd (u_int sn, u_int k, struct buf *reqbp) { struct buf *bp; struct scsicmd *scsicmd = (struct scsicmd *) reqbp->b_memaddr; struct scsicmd *scsicmd2; int noncontigs; struct disk *di; /* must have root capability for system to do a raw SCSI command!! */ /* XXX -- later, if desired, deeper checking of validity can reduce */ /* this restriction... */ if (k >= curenv->env_clen || ! curenv->env_clist[k].c_valid) { warn ("sys_disk_scsicmd: bad capability number %u\n", k); return (-E_CAP_INVALID); } if (! cap_isroot(&curenv->env_clist[k])) { warn ("sys_disk_scsicmd: cap %u is not root capability for system\n", k); return (-E_CAP_INSUFF); } /* must be able to read the reqbp ... */ if (! (isreadable_varange ((u_int) reqbp, sizeof (struct buf)))) { warn ("sys_disk_scsicmd: bad reqbp (%p)", reqbp); return (-E_FAULT); } /* Should be a SCSICMD */ if (! (reqbp->b_flags & B_SCSICMD)) { warn ("sys_disk_scsicmd: not a B_SCSICMD\n"); return (-E_INVAL); } /* Must be proper environment */ if (reqbp->b_envid != curenv->env_id) { warn ("sys_disk_scsicmd: bad envid\n"); return (-E_INVAL); } /* no scatter/gather support for raw SCSI commands */ if (reqbp->b_flags & B_SCATGATH) { warn ("sys_disk_scsicmd: B_SCATGATH not allowed with B_SCSICMD\n"); return (-E_INVAL); } /* can't send request to non-existent disk... */ if (reqbp->b_dev >= si->si_ndevs) { warn ("sys_disk_scsicmd: there is no disk %u in system\n", reqbp->b_dev); return (-E_NOT_FOUND); } /* check that everything is readable */ if (! isreadable_varange ((u_int) reqbp->b_memaddr, sizeof (struct scsicmd))) { warn ("sys_disk_scsicmd: SCSI command description is not readable\n"); return (-E_FAULT); } if (! isreadable_varange ((u_int) scsicmd->scsi_cmd, scsicmd->cmdlen) ) { warn ("sys_disk_scsicmd: SCSI command itself is not readable\n"); return (-E_FAULT); } if (! isreadable_varange ((u_int)scsicmd->data_addr, scsicmd->datalen) ) { warn ("sys_disk_scsicmd: data area for SCSI command is not readable\n"); return (-E_FAULT); } /* length of SCSI command must not be greater than B_SCSICMD_MAXLEN */ if (scsicmd->cmdlen > B_SCSICMD_MAXLEN) { /* XXX - why do we compare scsicmd->cmdlen, but we print out reqbp->b_bcount? */ warn ("sys_disk_scsicmd: specified SCSI command too large (%d > %d)\n", reqbp->b_bcount, B_SCSICMD_MAXLEN); return (-E_INVAL); } /* copy the SCSI command to avoid sharing it with app */ bp = bp_copy (reqbp); if (bp == NULL) { warn ("sys_disk_scsicmd: kernel malloc for bp failed\n"); return (-E_NO_MEM); } bp->b_memaddr = malloc (sizeof (struct scsicmd)); if (bp->b_memaddr == NULL) { warn ("sys_disk_scsicmd: kernel malloc for scsicmd failed\n"); free (bp); return (-E_NO_MEM); } scsicmd2 = (struct scsicmd *) bp->b_memaddr; bcopy (scsicmd, scsicmd2, sizeof (struct scsicmd)); scsicmd2->scsi_cmd = (struct scsi_generic *) malloc (scsicmd->cmdlen); if (scsicmd2->scsi_cmd == NULL) { warn ("sys_disk_scsicmd: second kernel malloc failed\n"); free (bp->b_memaddr); free (bp); return (-E_NO_MEM); } bcopy (scsicmd->scsi_cmd, scsicmd2->scsi_cmd, scsicmd->cmdlen); scsicmd2->bp = bp; bp->b_resid = scsicmd->datalen; bp->b_resptr = (int *) pa2kva (va2pa (reqbp->b_resptr)); /* pin down the app pages that will later be used by the driver */ ppage_pin (kva2pp ((u_int) bp->b_resptr)); noncontigs = pin_and_count_noncontigs (scsicmd2->data_addr, scsicmd2->datalen); if (noncontigs >= DISK_MAX_SCATTER) { warn ("sys_disk_scsicmd: will require too many scatter/gather entries " "(%d)", noncontigs); disk_buf_free (bp); return (-E_TOO_BIG); } /* XXX */ /* call down to the low-level driver. GROK -- since the partition stuff */ /* creates and abstract disk that is separate from the real one, a hack */ /* is needed to get the actual disk strategy routine for raw SCSI commands */ /* This is fine as long as all disks actually go to the same strategy */ /* routine. */ di = &(si->si_disks[0]); di->d_strategy (bp); return (0); }
static msgringent * msgringent_setup (msgringent * u_msgringent) { msgringent *ktmp; Pte *pte = NULL; int scatptr = 0; int total_len = 0; ktmp = (msgringent *) malloc (sizeof (msgringent)); if (ktmp == NULL) { warn ("msgringent_setup: failed malloc"); return (NULL); } ktmp->appaddr = u_msgringent; ktmp->owner = NULL; ktmp->body.n = 0; /* Verify and translate owner field */ if ((((u_int) u_msgringent->owner % sizeof (int)) || ! (pte = va2ptep ((u_int) u_msgringent->owner)) || ((*pte & WRITE_MASK) != WRITE_MASK))) { warn ("msgringent_setup: owner field failed\n"); msgringent_free (ktmp); return (NULL); } ktmp->owner = (u_int *) pa2kva (va2pa (u_msgringent->owner)); ppage_pin (kva2pp ((u_long) ktmp->owner)); /* Verify and translate data field */ if (u_msgringent->body.n > 1) { warn ("msgringent_setup: not allowed to setup disjoint message body\n"); msgringent_free (ktmp); return (NULL); } scatptr = 0; total_len = 0; { int len = u_msgringent->body.r[0].sz; caddr_t addr = u_msgringent->body.r[0].data; u_int pagebound = NBPG-(((u_long)addr)&(NBPG - 1)); while (len > 0) { u_int slen = min (len, pagebound); if (!(pte = va2ptep ((u_int) addr)) || ((*pte & READ_MASK) != READ_MASK)) { /* physical page is not accessible */ warn ("msgringent_setup: can't read scatter ptr\n"); msgringent_free (ktmp); return (NULL); } ktmp->body.r[scatptr].data = (char *) pa2kva (va2pa (addr)); ktmp->body.r[scatptr].sz = slen; ktmp->body.n++; /* pin the page to prevent re-allocation */ ppage_pin (kva2pp ((u_long) ktmp->body.r[scatptr].data)); len -= slen; addr += slen; total_len += slen; pagebound = NBPG; scatptr++; if (scatptr > IPC_MAX_SCATTER_PTR || total_len > IPC_MAX_MSG_SIZE) { msgringent_free (ktmp); warn ("msgringent_setup: message body too big\n"); return (NULL); } } } return (ktmp); }
/* A predicate is represented as a sum-of-products, that is (A1 A2 ... ) OR (B1 B2 ...) OR ... where each element in a product (the A?'s and B?'s) are simple predicates like v > 10. Predicates are represented in memory as an array of wk_term's, one term for each immediate, variable, operator, conjunction or disjunction. A single product is considered to be a group of contiguous wk_term's that are not WK_ORs. The whole mess is terminated by a WK_END. */ #include <vcode/vcode.h> #include <xok/wk.h> #include <xok/mmu.h> #include <xok/sys_proto.h> #include <xok/kerrno.h> #include <xok/malloc.h> #include <xok_include/assert.h> #include <xok/printf.h> #ifndef __CAP__ #include <xok/pmapP.h> #else #include <xok/pmap.h> #endif #define WK_MAX_CODE_BYTES 4096 #define OVERRUN_SAFETY 20 #define OVERRUN_CHECK \ { \ if (v_ip > code + WK_MAX_CODE_BYTES - OVERRUN_SAFETY) { \ warn ("wk_compile: out of code space\n"); \ ret = -E_INVAL; \ goto error; \ } \ } static int next_pp; /* outside function so can be used by cleanup code */ static int wk_compile (struct wk_term *t, int sz, char *code, u_int *pred_pages) { int i; v_reg_t r1, r2, z, tag; v_label_t end_of_term; int start_term = 1; int op1 = 1; cap c; struct Ppage *pp; u_int ppn; int ret = 0; next_pp = 0; v_lambda ("", "", NULL, 1, code, WK_MAX_CODE_BYTES); if (!v_getreg (&r1, V_U, V_TEMP) || !v_getreg (&r2, V_U, V_TEMP) || !v_getreg (&z, V_U, V_TEMP) || !v_getreg (&tag, V_U, V_TEMP)) panic ("wk_compile: architecture doesn't have enough registers."); v_setu (tag, -1); v_setu (z, 0); for (i = 0; i < sz; i++) { if (start_term) { end_of_term = v_genlabel (); start_term = 0; } OVERRUN_CHECK; switch (t[i].wk_type) { case WK_VAR: if (next_pp >= WK_MAX_PP-1) { warn ("wk_compile: too many pages in predicate\n"); ret = -E_INVAL; goto error; } if ((ret = env_getcap (curenv, t[i].wk_cap, &c)) < 0) { goto error; } ppn = PGNO((u_int)t[i].wk_var); if (!ppn || ppn >= nppage) { printf ("at index %d\n", i); warn ("wk_compile: invalid physical page\n"); ret = -E_INVAL; goto error; } pp = ppages_get(ppn); switch (Ppage_pp_status_get(pp)) { case PP_USER: if ((ret = ppage_acl_check(pp,&c,PP_ACL_LEN,0)) < 0) { goto error; } ppage_pin (pp); pred_pages[next_pp++] = ppn; break; case PP_KERNRO: /* user can access pages that each env get's mapped r/o */ break; default: printf ("at index %d\n", i); warn ("wk_compile: attempt to reference non PP_KERNRO or PP_USER page\n"); ret = -E_INVAL; goto error; } if (op1) { v_ldui (r1, z, (int )ptov (t[i].wk_var)); op1 = 0; } else { v_ldui (r2, z, (int )ptov (t[i].wk_var)); op1 = 1; } break; case WK_IMM: if (op1) { v_setu (r1, t[i].wk_imm); op1 = 0; } else { v_setu (r2, t[i].wk_imm); op1 = 1; } break; case WK_TAG: { v_setu (tag, t[i].wk_tag); break; } case WK_OP: { switch (t[i].wk_op) { case WK_GT: { v_bleu (r1, r2, end_of_term); break; } case WK_GTE: { v_bltu (r1, r2, end_of_term); break; } case WK_LT: { v_bgeu (r1, r2, end_of_term); break; } case WK_LTE: { v_bgtu (r1, r2, end_of_term); break; } case WK_EQ: { v_bneu (r1, r2, end_of_term); break; } case WK_NEQ: { v_bequ (r1, r2, end_of_term); break; } case WK_OR: { v_retu (tag); v_label (end_of_term); start_term = 1; break; } default: { printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred instruction\n"); ret = -E_INVAL; goto error; } } break; } default: printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred type\n"); ret = -E_INVAL; goto error; } } /* end the last term */ OVERRUN_CHECK; v_retu (tag); v_label (end_of_term); v_retui (0); v_end (NULL); error: /* have to do this even on error so that our caller can just call wk_free to clean memory/ref counts up */ pred_pages[next_pp] = 0; curenv->env_pred_pgs = pred_pages; curenv->env_pred = (Spred)code; return ret; }