/* * sys_disk_mbr * * Read/Write the master boot record of a disk. * * The mbr contains the bootstrap code that the BIOS * loads on startup. This is always in sector 0 * of the disk being booted. The mbr also contains the * partition table for the disk. * */ int sys_disk_mbr (u_int sn, int write, u_int dev, int k, char *buffer, int *resptr) { cap c; int ret; struct buf *diskbuf; /* get the capability */ if ((ret = env_getcap (curenv, k, &c)) < 0) return ret; /* make sure the root cap was passed in */ if (!cap_isroot (&c)) return -E_CAP_INSUFF; /* verify the dev */ if (dev >= si->si_ndisks) return -E_NOT_FOUND; /* check and translate the buffers we were given */ if ((((u_int) resptr) % sizeof(u_int)) || !(isvawriteable (resptr))) { warn ("sys_disk_mrb: bad resptr (%p)", resptr); return (-E_FAULT); } ppage_pin (pa2pp ((va2pa (resptr)))); resptr = (int *) pa2kva (va2pa (resptr)); if (write) { if (! (iswriteable_varange ((u_int)buffer, 512))) { warn ("sys_disk_mbr: bad buffer (%p)", buffer); return (-E_FAULT); } } else { if (! (isreadable_varange ((u_int)buffer, 512))) { warn ("sys_disk_mbr: bad buffer (%p)", buffer); return (-E_FAULT); } } /* get a disk req buffer and fill it in */ diskbuf = disk_buf_alloc (); if (!diskbuf) return -E_NO_MEM; diskbuf->b_next = NULL; diskbuf->b_sgnext = NULL; diskbuf->b_dev = dev; diskbuf->b_blkno = 0; diskbuf->b_bcount = 512; /* only want to read the first sector */ diskbuf->b_sgtot = 512; diskbuf->b_memaddr = buffer; diskbuf->b_envid = curenv->env_id; diskbuf->b_resid = 0; diskbuf->b_resptr = resptr; diskbuf->b_flags = B_ABSOLUTE; /* bypass partitions table */ if (write) { diskbuf->b_flags |= B_WRITE; } else { diskbuf->b_flags |= B_READ; } /* pin it in case the user frees it before the request completes. This will be unpinned when sched_reqcomplete is called which in turn calls disk_buf_free which calls ppage_unpin. */ ppage_pin (pa2pp ((va2pa (buffer)))); /* start the request */ si->si_disks[dev].d_strategy (diskbuf); return 0; }
/* * sys_disk_request * * Disk I/O without going through the buffer cache. * * xn_user is the name of a pxn that grants access to the disk * reqbp is a list of scatter/gather requests * k is which capability in the env should be checked * * permission is granted to perform the operation if: * 1) the blocks in reqbp are covered by the pxn * 2) the capability gives access to the pxn * */ int sys_disk_request (u_int sn, struct Xn_name *xn_user, struct buf *reqbp, u_int k) { struct Xn_name xn; struct Xn_xtnt xtnt; struct Pxn *pxn; cap c; int ret; int access; struct disk *di; int *resptr = NULL; u_int bcount = 0; struct buf *bp, *segbp, *nsegbp; int noncontigs = 0, nctemp; #ifdef MEASURE_DISK_TIMES disk_pctr_start = rdtsc(); #endif /* XXX - use PFM or copyin instead of isreadable_* */ /* bypass for direct scsi commands */ if (reqbp->b_flags & B_SCSICMD) { return sys_disk_scsicmd (sn, k, reqbp); } /* get the capability */ if ((ret = env_getcap (curenv, k, &c)) < 0) return ret; /* and the pxn */ copyin (xn_user, &xn, sizeof (xn)); if (! (pxn = lookup_pxn (&xn))) { warn ("sys_disk_request: no pxn found"); return (-E_NOT_FOUND); } /* XXX - do we need to check that this is a physical disk? */ /* get a refernce to the disk unit for this command */ di = &(si->si_disks[xn.xa_dev]); /* Iterate over the request list checking: -- if the request is transfering data to/from memory that this user can read/write. -- if the pxn and capability specified give access to these blocks */ for (segbp = reqbp; ; segbp = (struct buf *) segbp->b_sgnext) { if (! (isreadable_varange ((u_int)segbp, sizeof(struct buf)))) { warn ("sys_disk_request: bad reqbp (%p)", segbp); return (-E_FAULT); } if (segbp->b_flags & B_READ) { access = ACL_R; } else { access = ACL_W; } xtnt.xtnt_block = segbp->b_blkno; xtnt.xtnt_size = segbp->b_bcount / di->d_bsize; bcount += segbp->b_bcount; if (! pxn_authorizes_xtnt (pxn, &c, &xtnt, access, &ret)) { warn ("sys_disk_request: pxn/cap does not grant access to block(s)"); return ret; } if (! ((reqbp->b_flags & B_READ) ? iswriteable_varange : isreadable_varange) ((u_int) segbp->b_memaddr, segbp->b_bcount)) { warn ("sys_disk_request: bad b_memaddr: %p (b_bcount %d)", segbp->b_memaddr, segbp->b_bcount); return (-E_FAULT); } if (! (segbp->b_flags & B_SCATGATH)) { if (segbp->b_resptr) { resptr = segbp->b_resptr; if ((((u_int) resptr) % sizeof(u_int)) || !(isvawriteable (resptr))) { warn ("sys_disk_request: bad resptr (%p)", resptr); return (-E_FAULT); } resptr = (int *) pa2kva (va2pa (resptr)); } break; } } if ((reqbp->b_flags & B_SCATGATH) && bcount != reqbp->b_sgtot) { warn ("sys_disk_request: invalid scatter/gather, with total (%u) unequal " "to sum of parts (%u)", reqbp->b_sgtot, bcount); return (-E_INVAL); } /* are we done before we've started? */ if (bcount == 0) { if (resptr) (*resptr)--; return (0); } if (bcount & di->d_bmod) { warn ("sys_disk_request: bad bcount %u", bcount); return (-E_INVAL); } /* copy request into kernel buffer */ segbp = reqbp; nsegbp = NULL; reqbp = NULL; do { segbp->b_dev = di->d_id; bp = copy_and_pin(segbp, segbp->b_bcount, &nctemp); if (!bp) { warn ("sys_disk_request: could not copy_and_pin"); /* XXX - cleanup before returning */ return (-E_NO_MEM); } noncontigs += nctemp; if (nsegbp) nsegbp->b_sgnext = bp; if (!reqbp) reqbp = bp; if (noncontigs >= DISK_MAX_SCATTER) { warn ("sys_disk_request: would require too many scatter/gather entries " "(%d)", noncontigs); /* XXX - cleanup before returning */ return (-E_INVAL); } nsegbp = bp; segbp = segbp->b_sgnext; } while (nsegbp->b_flags & B_SCATGATH); nsegbp->b_resptr = resptr; if (resptr) ppage_pin (kva2pp((u_int) resptr)); /* call appropriate strategy routine */ di->d_strategy (reqbp); #ifdef MEASURE_DISK_TIMES disk_pctr_return = rdtsc(); #endif return (0); }
/* A predicate is represented as a sum-of-products, that is (A1 A2 ... ) OR (B1 B2 ...) OR ... where each element in a product (the A?'s and B?'s) are simple predicates like v > 10. Predicates are represented in memory as an array of wk_term's, one term for each immediate, variable, operator, conjunction or disjunction. A single product is considered to be a group of contiguous wk_term's that are not WK_ORs. The whole mess is terminated by a WK_END. */ #include <vcode/vcode.h> #include <xok/wk.h> #include <xok/mmu.h> #include <xok/sys_proto.h> #include <xok/kerrno.h> #include <xok/malloc.h> #include <xok_include/assert.h> #include <xok/printf.h> #ifndef __CAP__ #include <xok/pmapP.h> #else #include <xok/pmap.h> #endif #define WK_MAX_CODE_BYTES 4096 #define OVERRUN_SAFETY 20 #define OVERRUN_CHECK \ { \ if (v_ip > code + WK_MAX_CODE_BYTES - OVERRUN_SAFETY) { \ warn ("wk_compile: out of code space\n"); \ ret = -E_INVAL; \ goto error; \ } \ } static int next_pp; /* outside function so can be used by cleanup code */ static int wk_compile (struct wk_term *t, int sz, char *code, u_int *pred_pages) { int i; v_reg_t r1, r2, z, tag; v_label_t end_of_term; int start_term = 1; int op1 = 1; cap c; struct Ppage *pp; u_int ppn; int ret = 0; next_pp = 0; v_lambda ("", "", NULL, 1, code, WK_MAX_CODE_BYTES); if (!v_getreg (&r1, V_U, V_TEMP) || !v_getreg (&r2, V_U, V_TEMP) || !v_getreg (&z, V_U, V_TEMP) || !v_getreg (&tag, V_U, V_TEMP)) panic ("wk_compile: architecture doesn't have enough registers."); v_setu (tag, -1); v_setu (z, 0); for (i = 0; i < sz; i++) { if (start_term) { end_of_term = v_genlabel (); start_term = 0; } OVERRUN_CHECK; switch (t[i].wk_type) { case WK_VAR: if (next_pp >= WK_MAX_PP-1) { warn ("wk_compile: too many pages in predicate\n"); ret = -E_INVAL; goto error; } if ((ret = env_getcap (curenv, t[i].wk_cap, &c)) < 0) { goto error; } ppn = PGNO((u_int)t[i].wk_var); if (!ppn || ppn >= nppage) { printf ("at index %d\n", i); warn ("wk_compile: invalid physical page\n"); ret = -E_INVAL; goto error; } pp = ppages_get(ppn); switch (Ppage_pp_status_get(pp)) { case PP_USER: if ((ret = ppage_acl_check(pp,&c,PP_ACL_LEN,0)) < 0) { goto error; } ppage_pin (pp); pred_pages[next_pp++] = ppn; break; case PP_KERNRO: /* user can access pages that each env get's mapped r/o */ break; default: printf ("at index %d\n", i); warn ("wk_compile: attempt to reference non PP_KERNRO or PP_USER page\n"); ret = -E_INVAL; goto error; } if (op1) { v_ldui (r1, z, (int )ptov (t[i].wk_var)); op1 = 0; } else { v_ldui (r2, z, (int )ptov (t[i].wk_var)); op1 = 1; } break; case WK_IMM: if (op1) { v_setu (r1, t[i].wk_imm); op1 = 0; } else { v_setu (r2, t[i].wk_imm); op1 = 1; } break; case WK_TAG: { v_setu (tag, t[i].wk_tag); break; } case WK_OP: { switch (t[i].wk_op) { case WK_GT: { v_bleu (r1, r2, end_of_term); break; } case WK_GTE: { v_bltu (r1, r2, end_of_term); break; } case WK_LT: { v_bgeu (r1, r2, end_of_term); break; } case WK_LTE: { v_bgtu (r1, r2, end_of_term); break; } case WK_EQ: { v_bneu (r1, r2, end_of_term); break; } case WK_NEQ: { v_bequ (r1, r2, end_of_term); break; } case WK_OR: { v_retu (tag); v_label (end_of_term); start_term = 1; break; } default: { printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred instruction\n"); ret = -E_INVAL; goto error; } } break; } default: printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred type\n"); ret = -E_INVAL; goto error; } } /* end the last term */ OVERRUN_CHECK; v_retu (tag); v_label (end_of_term); v_retui (0); v_end (NULL); error: /* have to do this even on error so that our caller can just call wk_free to clean memory/ref counts up */ pred_pages[next_pp] = 0; curenv->env_pred_pgs = pred_pages; curenv->env_pred = (Spred)code; return ret; }