Exemple #1
0
static int
pin_and_count_noncontigs (char *addr, u_int datalen)
{
  u_int data, next, ppn, prev_ppn;
  u_int noncontigs = 0;

  data = (u_int) addr;
  prev_ppn = si->si_nppages;

  while (datalen > 0) {
    ppn = PGNO (*va2ptep (data));
    if (ppn != (prev_ppn + 1)) {
      noncontigs++;
    }
    ppage_pin (&ppages[ppn]);
    prev_ppn = ppn;
    /* go to start of next page of data */
    next = (data & ~PGMASK) + NBPG;
    if (next - data >= datalen) break;
    datalen -= next - data;
    data = next;
  }

  return (noncontigs);
}
Exemple #2
0
int
disk_prepare_bc_request (u_int devno, u_quad_t blkno, void *vaddr, u_int flags,
			 int *resptr, struct buf **headbpp)
{
  struct buf *bp;

  /* XXX - test for big blkno wraparound */
  if ((devno >= si->si_ndisks) ||
      (((blkno * NBPG) / si->si_disks[devno].d_bsize) >=
       si->si_disks[devno].d_size)) {
    warn ("disk_prepare_bc_request: invalid devno (%u) or blkno (%qu)",
	  devno, blkno);
    return (-E_INVAL);
  }

  if (headbpp == NULL) {
    warn ("disk_prepare_bc_request: headbpp == NULL");
    return (-E_INVAL);
  }

  bp = disk_buf_alloc();
  if (!bp)
    return (-E_NO_MEM);

  bp->b_next = NULL;
  bp->b_sgnext = NULL;
  bp->b_flags = flags;
  bp->b_dev = devno;
  bp->b_blkno = (blkno * NBPG) / si->si_disks[devno].d_bsize;
  bp->b_bcount = NBPG;
  bp->b_sgtot = 0;
  bp->b_memaddr = vaddr;
  bp->b_envid = curenv->env_id;
  bp->b_resid = 0;
  bp->b_resptr = NULL;

  ppage_pin (kva2pp((u_int) vaddr));

  if (*headbpp) {
    struct buf *tmpbp = *headbpp;

    while (tmpbp->b_flags & B_SCATGATH) {
      tmpbp = (struct buf *) tmpbp->b_sgnext;
      if (tmpbp == NULL) {
	warn ("disk_prepare_bc_request: bad scatter/gather list");
	ppage_unpin (kva2pp((u_int) vaddr));
	free (bp);
	return (-E_INVAL);
      }
    }

    /* XXX - test for big blkno wraparound */
    if (bp->b_blkno != (tmpbp->b_blkno +
			(tmpbp->b_bcount / 
			 si->si_disks[bp->b_dev].d_bsize))) {
      warn ("disk_prepare_bc_request: noncontiguous requests "
	    "(prevblk %qu, size %u, curblk %qu) can't be merged",
	    tmpbp->b_blkno, tmpbp->b_bcount, bp->b_blkno);
      ppage_unpin (kva2pp((u_int) vaddr));
      free (bp);
      return (-E_INVAL);
    }

    (*headbpp)->b_sgtot += NBPG;
    tmpbp->b_flags |= B_SCATGATH;
    tmpbp->b_sgnext = bp;
  } else {
    *headbpp = bp;
    bp->b_sgtot = NBPG;
  }

  if (resptr) {
    ppage_pin (kva2pp(((u_int) resptr)));
  }
  bp->b_resptr = resptr;

  return (0);
}
Exemple #3
0
/*
 * sys_disk_request
 *
 * Disk I/O without going through the buffer cache.
 *
 * xn_user is the name of a pxn that grants access to the disk
 * reqbp is a list of scatter/gather requests
 * k is which capability in the env should be checked
 *
 * permission is granted to perform the operation if:
 * 1) the blocks in reqbp are covered by the pxn
 * 2) the capability gives access to the pxn
 *
 */
int
sys_disk_request (u_int sn, struct Xn_name *xn_user, struct buf *reqbp,
		  u_int k)
{
  struct Xn_name xn;
  struct Xn_xtnt xtnt;
  struct Pxn *pxn;
  cap c;
  int ret;
  int access;
  struct disk *di;
  int *resptr = NULL;
  u_int bcount = 0;
  struct buf *bp, *segbp, *nsegbp;
  int noncontigs = 0, nctemp;

#ifdef MEASURE_DISK_TIMES
  disk_pctr_start = rdtsc();
#endif

  /* XXX - use PFM or copyin instead of isreadable_* */

  /* bypass for direct scsi commands */
  if (reqbp->b_flags & B_SCSICMD) {
    return sys_disk_scsicmd (sn, k, reqbp);
  }

  /* get the capability */
  if ((ret = env_getcap (curenv, k, &c)) < 0)
    return ret;

  /* and the pxn */
  copyin (xn_user, &xn, sizeof (xn));
  if (! (pxn = lookup_pxn (&xn))) {
    warn ("sys_disk_request: no pxn found");
    return (-E_NOT_FOUND);
  }

  /* XXX - do we need to check that this is a physical disk? */
  /* get a refernce to the disk unit for this command */
  di = &(si->si_disks[xn.xa_dev]);

  /* Iterate over the request list checking:
     -- if the request is transfering data to/from
     memory that this user can read/write.
     -- if the pxn and capability specified give
     access to these blocks */
  for (segbp = reqbp; ; segbp = (struct buf *) segbp->b_sgnext) {
    if (! (isreadable_varange ((u_int)segbp, sizeof(struct buf)))) {
      warn ("sys_disk_request: bad reqbp (%p)", segbp);
      return (-E_FAULT);
    }

    if (segbp->b_flags & B_READ) {
      access = ACL_R;
    } else {
      access = ACL_W;
    }
    
    xtnt.xtnt_block = segbp->b_blkno;
    xtnt.xtnt_size = segbp->b_bcount / di->d_bsize;
    bcount += segbp->b_bcount;

    if (! pxn_authorizes_xtnt (pxn, &c, &xtnt, access, &ret)) {
      warn ("sys_disk_request: pxn/cap does not grant access to block(s)");
      return ret;
    }

    if (! ((reqbp->b_flags & B_READ) ? iswriteable_varange
	   : isreadable_varange) ((u_int) segbp->b_memaddr, segbp->b_bcount)) {
      warn ("sys_disk_request: bad b_memaddr: %p (b_bcount %d)",
	    segbp->b_memaddr, segbp->b_bcount);
      return (-E_FAULT);
    }

    if (! (segbp->b_flags & B_SCATGATH)) {
      if (segbp->b_resptr) {
	resptr = segbp->b_resptr;
	if ((((u_int) resptr) % sizeof(u_int)) || 
	    !(isvawriteable (resptr))) {
	  warn ("sys_disk_request: bad resptr (%p)", resptr);
	  return (-E_FAULT);
	}
	resptr = (int *) pa2kva (va2pa (resptr));
      }
      break;
    }
  }

  if ((reqbp->b_flags & B_SCATGATH) && bcount != reqbp->b_sgtot) {
    warn ("sys_disk_request: invalid scatter/gather, with total (%u) unequal "
	  "to sum of parts (%u)", reqbp->b_sgtot, bcount);
    return (-E_INVAL);
  }

  /* are we done before we've started? */
  if (bcount == 0) {
    if (resptr)
      (*resptr)--;
    return (0);
  }

  if (bcount & di->d_bmod) {
    warn ("sys_disk_request: bad bcount %u", bcount);
    return (-E_INVAL);
  }

  /* copy request into kernel buffer */
  segbp = reqbp;
  nsegbp = NULL;
  reqbp = NULL;
  do {
    segbp->b_dev = di->d_id;
    bp = copy_and_pin(segbp, segbp->b_bcount, &nctemp);
    if (!bp) {
      warn ("sys_disk_request: could not copy_and_pin");
      /* XXX - cleanup before returning */
      return (-E_NO_MEM);
    }
    noncontigs += nctemp;
    if (nsegbp) nsegbp->b_sgnext = bp;
    if (!reqbp) reqbp = bp;
    if (noncontigs >= DISK_MAX_SCATTER) {
      warn ("sys_disk_request: would require too many scatter/gather entries "
	    "(%d)", noncontigs);
      /* XXX - cleanup before returning */
      return (-E_INVAL);
    }
    nsegbp = bp;
    segbp = segbp->b_sgnext;
  } while (nsegbp->b_flags & B_SCATGATH);

  nsegbp->b_resptr = resptr;

  if (resptr) ppage_pin (kva2pp((u_int) resptr));

  /* call appropriate strategy routine */
  di->d_strategy (reqbp);

#ifdef MEASURE_DISK_TIMES
  disk_pctr_return = rdtsc();
#endif

  return (0);
}
Exemple #4
0
/*
 * sys_disk_mbr
 *
 * Read/Write the master boot record of a disk.
 *
 * The mbr contains the bootstrap code that the BIOS
 * loads on startup. This is always in sector 0
 * of the disk being booted. The mbr also contains the
 * partition table for the disk.
 *
 */
int
sys_disk_mbr (u_int sn, int write, u_int dev, int k, char *buffer, int *resptr)
{
  cap c;
  int ret;
  struct buf *diskbuf;

  /* get the capability */
  if ((ret = env_getcap (curenv, k, &c)) < 0)
    return ret;

  /* make sure the root cap was passed in */
  if (!cap_isroot (&c))
    return -E_CAP_INSUFF;

  /* verify the dev */
  if (dev >= si->si_ndisks)
    return -E_NOT_FOUND;

  /* check and translate the buffers we were given */
  if ((((u_int) resptr) % sizeof(u_int)) || 
      !(isvawriteable (resptr))) {
    warn ("sys_disk_mrb: bad resptr (%p)", resptr);
    return (-E_FAULT);
  }
  ppage_pin (pa2pp ((va2pa (resptr))));
  resptr = (int *) pa2kva (va2pa (resptr));

  if (write) {
    if (! (iswriteable_varange ((u_int)buffer, 512))) {
      warn ("sys_disk_mbr: bad buffer (%p)", buffer);
      return (-E_FAULT);
    }
  } else {
    if (! (isreadable_varange ((u_int)buffer, 512))) {
      warn ("sys_disk_mbr: bad buffer (%p)", buffer);
      return (-E_FAULT);
    }
  }

  /* get a disk req buffer and fill it in */
  diskbuf = disk_buf_alloc ();
  if (!diskbuf)
    return -E_NO_MEM;

  diskbuf->b_next = NULL;
  diskbuf->b_sgnext = NULL;
  diskbuf->b_dev = dev;
  diskbuf->b_blkno = 0;
  diskbuf->b_bcount = 512;	/* only want to read the first sector */
  diskbuf->b_sgtot = 512;
  diskbuf->b_memaddr = buffer;
  diskbuf->b_envid = curenv->env_id;
  diskbuf->b_resid = 0;
  diskbuf->b_resptr = resptr;
  diskbuf->b_flags = B_ABSOLUTE;		/* bypass partitions table */
  if (write) {
    diskbuf->b_flags |= B_WRITE;
  } else {
    diskbuf->b_flags |= B_READ;
  }

  /* pin it in case the user frees it before the request completes.
     This will be unpinned when sched_reqcomplete is called which
     in turn calls disk_buf_free which calls ppage_unpin. */

  ppage_pin (pa2pp ((va2pa (buffer))));

  /* start the request */
  si->si_disks[dev].d_strategy (diskbuf);

  return 0;
}
Exemple #5
0
/* XXX - we should use copyin, etc, instead of isreadable_* so that user will
   get pagefaults he can handle transparently */
static int
sys_disk_scsicmd (u_int sn, u_int k, struct buf *reqbp)
{
  struct buf *bp;
  struct scsicmd *scsicmd = (struct scsicmd *) reqbp->b_memaddr;
  struct scsicmd *scsicmd2;
  int noncontigs;
  struct disk *di;

  /* must have root capability for system to do a raw SCSI command!!   */
  /* XXX -- later, if desired, deeper checking of validity can reduce  */
  /* this restriction...                                               */
  if (k >= curenv->env_clen || ! curenv->env_clist[k].c_valid) {
    warn ("sys_disk_scsicmd: bad capability number %u\n", k);
    return (-E_CAP_INVALID);
  }
  if (! cap_isroot(&curenv->env_clist[k])) {
    warn ("sys_disk_scsicmd: cap %u is not root capability for system\n", k);
    return (-E_CAP_INSUFF);
  }

  /* must be able to read the reqbp ... */
  if (! (isreadable_varange ((u_int) reqbp, sizeof (struct buf)))) {
    warn ("sys_disk_scsicmd: bad reqbp (%p)", reqbp);
    return (-E_FAULT);
  }

  /* Should be a SCSICMD */
  if (! (reqbp->b_flags & B_SCSICMD)) {
    warn ("sys_disk_scsicmd: not a B_SCSICMD\n");
    return (-E_INVAL);
  }

  /* Must be proper environment */
  if (reqbp->b_envid != curenv->env_id) {
    warn ("sys_disk_scsicmd: bad envid\n");
    return (-E_INVAL);
  }

  /* no scatter/gather support for raw SCSI commands */
  if (reqbp->b_flags & B_SCATGATH) {
    warn ("sys_disk_scsicmd: B_SCATGATH not allowed with B_SCSICMD\n");
    return (-E_INVAL);
  }

  /* can't send request to non-existent disk... */
  if (reqbp->b_dev >= si->si_ndevs) {
    warn ("sys_disk_scsicmd: there is no disk %u in system\n", reqbp->b_dev);
    return (-E_NOT_FOUND);
  }

  /* check that everything is readable */
  if (! isreadable_varange ((u_int) reqbp->b_memaddr,
			    sizeof (struct scsicmd))) {
    warn ("sys_disk_scsicmd: SCSI command description is not readable\n");
    return (-E_FAULT);
  }

  if (! isreadable_varange ((u_int) scsicmd->scsi_cmd, scsicmd->cmdlen) ) {
    warn ("sys_disk_scsicmd: SCSI command itself is not readable\n");
    return (-E_FAULT);
  }

  if (! isreadable_varange ((u_int)scsicmd->data_addr, scsicmd->datalen) ) {
    warn ("sys_disk_scsicmd: data area for SCSI command is not readable\n");
    return (-E_FAULT);
  }

  /* length of SCSI command must not be greater than B_SCSICMD_MAXLEN */
  if (scsicmd->cmdlen > B_SCSICMD_MAXLEN) {
    /* XXX - why do we compare scsicmd->cmdlen, but we print out
       reqbp->b_bcount? */
    warn ("sys_disk_scsicmd: specified SCSI command too large (%d > %d)\n",
	  reqbp->b_bcount, B_SCSICMD_MAXLEN);
    return (-E_INVAL);
  }

  /* copy the SCSI command to avoid sharing it with app */
  bp = bp_copy (reqbp);
  if (bp == NULL) {
    warn ("sys_disk_scsicmd: kernel malloc for bp failed\n");
    return (-E_NO_MEM);
  }
  bp->b_memaddr = malloc (sizeof (struct scsicmd));
  if (bp->b_memaddr == NULL) {
    warn ("sys_disk_scsicmd: kernel malloc for scsicmd failed\n");
    free (bp);
    return (-E_NO_MEM);
  }
  scsicmd2 = (struct scsicmd *) bp->b_memaddr;
  bcopy (scsicmd, scsicmd2, sizeof (struct scsicmd));
  scsicmd2->scsi_cmd = (struct scsi_generic *) malloc (scsicmd->cmdlen);
  if (scsicmd2->scsi_cmd == NULL) {
    warn ("sys_disk_scsicmd: second kernel malloc failed\n");
    free (bp->b_memaddr);
    free (bp);
    return (-E_NO_MEM);
  }
  bcopy (scsicmd->scsi_cmd, scsicmd2->scsi_cmd, scsicmd->cmdlen);
  scsicmd2->bp = bp;
  bp->b_resid = scsicmd->datalen;
  bp->b_resptr = (int *) pa2kva (va2pa (reqbp->b_resptr));

  /* pin down the app pages that will later be used by the driver */
  ppage_pin (kva2pp ((u_int) bp->b_resptr));
  noncontigs = pin_and_count_noncontigs (scsicmd2->data_addr,
					 scsicmd2->datalen);
  if (noncontigs >= DISK_MAX_SCATTER) {
    warn ("sys_disk_scsicmd: will require too many scatter/gather entries "
	  "(%d)", noncontigs);
    disk_buf_free (bp);
    return (-E_TOO_BIG);
  }

  /* XXX */
  /* call down to the low-level driver.  GROK -- since the partition stuff   */
  /* creates and abstract disk that is separate from the real one, a hack    */
  /* is needed to get the actual disk strategy routine for raw SCSI commands */
  /* This is fine as long as all disks actually go to the same strategy      */
  /* routine.                                                                */
   di = &(si->si_disks[0]);
   di->d_strategy (bp);

   return (0);
}
Exemple #6
0
static msgringent *
msgringent_setup (msgringent * u_msgringent)
{
  msgringent *ktmp;
  Pte *pte = NULL;
  int scatptr = 0;
  int total_len = 0;

  ktmp = (msgringent *) malloc (sizeof (msgringent));
  if (ktmp == NULL)
  {
    warn ("msgringent_setup: failed malloc");
    return (NULL);
  }

  ktmp->appaddr = u_msgringent;
  ktmp->owner = NULL;
  ktmp->body.n = 0;

  /* Verify and translate owner field */
  if ((((u_int) u_msgringent->owner % sizeof (int)) ||
       ! (pte = va2ptep ((u_int) u_msgringent->owner)) ||
         ((*pte & WRITE_MASK) != WRITE_MASK)))
  {
    warn ("msgringent_setup: owner field failed\n");
    msgringent_free (ktmp);
    return (NULL);
  }
  ktmp->owner = (u_int *) pa2kva (va2pa (u_msgringent->owner));
  ppage_pin (kva2pp ((u_long) ktmp->owner));

  
  /* Verify and translate data field */
  if (u_msgringent->body.n > 1)
  {
    warn ("msgringent_setup: not allowed to setup disjoint message body\n");
    msgringent_free (ktmp);
    return (NULL);
  }

  scatptr = 0;
  total_len = 0;
  
  {
    int len = u_msgringent->body.r[0].sz;
    caddr_t addr = u_msgringent->body.r[0].data;
    u_int pagebound = NBPG-(((u_long)addr)&(NBPG - 1));

    while (len > 0)
    {
      u_int slen = min (len, pagebound);
      if (!(pte = va2ptep ((u_int) addr)) || 
          ((*pte & READ_MASK) != READ_MASK))
      {
        /* physical page is not accessible */
        warn ("msgringent_setup: can't read scatter ptr\n");
        msgringent_free (ktmp);
        return (NULL);
      }
            
      ktmp->body.r[scatptr].data = (char *) pa2kva (va2pa (addr));
      ktmp->body.r[scatptr].sz = slen;
      ktmp->body.n++;
	
      /* pin the page to prevent re-allocation */
      ppage_pin (kva2pp ((u_long) ktmp->body.r[scatptr].data));
      len -= slen;
      addr += slen;
      total_len += slen;
      pagebound = NBPG;
      scatptr++;
    
      if (scatptr > IPC_MAX_SCATTER_PTR || total_len > IPC_MAX_MSG_SIZE)
      {
        msgringent_free (ktmp);
        warn ("msgringent_setup: message body too big\n");
        return (NULL);
      }
    }
  }

  return (ktmp);
}
Exemple #7
0
/* A predicate is represented as a sum-of-products, that is
   (A1 A2 ... ) OR (B1 B2 ...) OR ...
   where each element in a product (the A?'s and B?'s) are simple
   predicates like v > 10.

   Predicates are represented in memory as an array of wk_term's,
   one term for each immediate, variable, operator, conjunction or
   disjunction. A single product is considered to be a group of
   contiguous wk_term's that are not WK_ORs. The whole mess is
   terminated by a WK_END.  */

#include <vcode/vcode.h>
#include <xok/wk.h>
#include <xok/mmu.h>
#include <xok/sys_proto.h>
#include <xok/kerrno.h>
#include <xok/malloc.h>
#include <xok_include/assert.h>
#include <xok/printf.h>

#ifndef __CAP__
#include <xok/pmapP.h>
#else
#include <xok/pmap.h>
#endif

#define WK_MAX_CODE_BYTES 4096

#define OVERRUN_SAFETY 20
#define OVERRUN_CHECK						\
{								\
  if (v_ip > code + WK_MAX_CODE_BYTES - OVERRUN_SAFETY) {	\
    warn ("wk_compile: out of code space\n");			\
    ret = -E_INVAL;						\
    goto error;							\
  }								\
}

static int next_pp; /* outside function so can be used by cleanup code */
static int wk_compile (struct wk_term *t, int sz, char *code,
		       u_int *pred_pages) {
  int i;
  v_reg_t r1, r2, z, tag;
  v_label_t end_of_term;
  int start_term = 1;
  int op1 = 1;
  cap c;
  struct Ppage *pp;
  u_int ppn;
  int ret = 0;

  next_pp = 0;

  v_lambda ("", "", NULL, 1, code, WK_MAX_CODE_BYTES);
  if (!v_getreg (&r1, V_U, V_TEMP) ||
      !v_getreg (&r2, V_U, V_TEMP) ||
      !v_getreg (&z, V_U, V_TEMP) ||
      !v_getreg (&tag, V_U, V_TEMP))
    panic ("wk_compile: architecture doesn't have enough registers.");

  v_setu (tag, -1);
  v_setu (z, 0);  
  
  for (i = 0; i < sz; i++) {
    if (start_term) {
      end_of_term = v_genlabel ();
      start_term = 0;
    }
    OVERRUN_CHECK;
    switch (t[i].wk_type) {
    case WK_VAR:
      if (next_pp >= WK_MAX_PP-1) {
	warn ("wk_compile: too many pages in predicate\n");
	ret = -E_INVAL;
	goto error;
      }
      if ((ret = env_getcap (curenv, t[i].wk_cap, &c)) < 0) {
	goto error;
      }
      ppn = PGNO((u_int)t[i].wk_var);
      if (!ppn || ppn >= nppage) {
	printf ("at index %d\n", i);
	warn ("wk_compile: invalid physical page\n");
	ret = -E_INVAL;
	goto error;
      }
      pp = ppages_get(ppn);
      switch (Ppage_pp_status_get(pp)) {
      case PP_USER:
	if ((ret = ppage_acl_check(pp,&c,PP_ACL_LEN,0)) < 0) {
	  goto error;
	}
	ppage_pin (pp);
	pred_pages[next_pp++] = ppn;
	break;
      case PP_KERNRO:
	/* user can access pages that each env get's mapped r/o */
	break;
      default:
	printf ("at index %d\n", i);
	warn ("wk_compile: attempt to reference non PP_KERNRO or PP_USER page\n");
	ret = -E_INVAL;
	goto error;
      }
      if (op1) {
	v_ldui (r1, z, (int )ptov (t[i].wk_var));
	op1 = 0;
      } else {
	v_ldui (r2, z, (int )ptov (t[i].wk_var));
	op1 = 1;
      }
      break;
    case WK_IMM:
      if (op1) {
	v_setu (r1, t[i].wk_imm);
	op1 = 0;
      } else {
	v_setu (r2, t[i].wk_imm);
	op1 = 1;
      }
      break;
    case WK_TAG: {
      v_setu (tag, t[i].wk_tag);
      break;
    }
    case WK_OP: {
      switch (t[i].wk_op) {
      case WK_GT: {
	v_bleu (r1, r2, end_of_term); 
	break;
      }
      case WK_GTE: {
	v_bltu (r1, r2, end_of_term); 
	break;
      }
      case WK_LT: {
	v_bgeu (r1, r2, end_of_term);
	break;
      }
      case WK_LTE: {
	v_bgtu (r1, r2, end_of_term); 
	break;
      }
      case WK_EQ: {
	v_bneu (r1, r2, end_of_term);
	break;
      }
      case WK_NEQ: {
	v_bequ (r1, r2, end_of_term);
	break;
      }
      case WK_OR: {
	v_retu (tag);
	v_label (end_of_term);
	start_term = 1; 
	break;
      }
      default: {
	printf ("at index %d\n", i);
	warn ("wk_compile: invalid wk-pred instruction\n");
	ret = -E_INVAL;
	goto error;
      }
      }
      break;
    }
    default:
      printf ("at index %d\n", i);
      warn ("wk_compile: invalid wk-pred type\n");
      ret = -E_INVAL;
      goto error;
    }
  }
      
  /* end the last term */
  OVERRUN_CHECK;
  v_retu (tag);
  v_label (end_of_term);

  v_retui (0);
  v_end (NULL);

error:
  /* have to do this even on error so that our caller can just call
     wk_free to clean memory/ref counts up */
  pred_pages[next_pp] = 0;
  curenv->env_pred_pgs = pred_pages;
  curenv->env_pred = (Spred)code;
  return ret;
}