Beispiel #1
0
int
__remap_reserved_page(u_int va, u_int pte_flags)
{
  u_int i;

  for (i=0; i < __eea->eea_reserved_pages; i++) {
    if ((vpt[PGNO((u_int)__eea->eea_reserved_first) + i] & PG_P)) {
      if (_exos_self_insert_pte(CAP_ROOT,
				ppnf2pte(PGNO(vpt[PGNO((u_int)
						       __eea->
						       eea_reserved_first) +
						 i]), pte_flags),
				va, 0, NULL) < 0 ||
	  _exos_self_unmap_page(CAP_ROOT,
				(u_int)__eea->eea_reserved_first +
				i * NBPG) < 0) {
	sys_cputs("__remap_reserved_page: can't remap\n");
	return -1;
      }
      UAREA.u_reserved_pages--;
      return 0;
    }
  }

  sys_cputs("__remap_reserved_page: none left\n");
  return -1;
}
Beispiel #2
0
void * exos_bufcache_alloc (u32 dev, u32 blk, int zerofill, int writeable,
			    int usexn)
{
   int ret;
   unsigned int vaddr = BUFCACHE_ADDR (__sysinfo.si_nppages);

   if (writeable) {
      writeable = PG_W;
   }

   /* This first call to insert_pte causes a physical page to be allocated. */
   /* Start with page mapped writeable, since might be zerofill.            */

   if (((ret = _exos_self_insert_pte (CAP_ROOT, PG_W | PG_P | PG_U | PG_SHARED,
				      vaddr, ESIP_DONTPAGE, NULL)) < 0) ||
       (vpt[PGNO(vaddr)] == 0)) {
      kprintf ("exos_bufcache_alloc: _exos_self_insert_pte failed (ret %d)\n",
	       ret);
      return (NULL);
   }

   if (zerofill) {
      bzero ((char *)vaddr, NBPG);
   }

   /* do final-location mapping based on "writeable" variable */

   if (((ret = _exos_self_insert_pte (CAP_ROOT,
				      ppnf2pte(va2ppn(vaddr),
					       writeable | PG_P | PG_U |
					       PG_SHARED),
				      BUFCACHE_ADDR (va2ppn(vaddr)),
				      ESIP_DONTPAGE, &__sysinfo.si_pxn[dev])) < 0) ||
       (vpt[PGNO(vaddr)] == 0)) {
      kprintf ("exos_bufcache_alloc: failed to add real mapping (ret %d)\n",
	       ret);
      assert (0);
   }
   vaddr = BUFCACHE_ADDR (va2ppn(vaddr));

   /* Unmap the allocation mapping before inserting into bc, to make sure */
   /* that we never have a non-writeable bc entry mapped writable...      */

   if ((ret = _exos_self_unmap_page (CAP_ROOT,
				     BUFCACHE_ADDR(__sysinfo.si_nppages)))
       < 0) {
      kprintf ("exos_bufcache_alloc: failed to clobber fake mapping "
	       "(ret %d)\n", ret);
      assert (0);
   }

#if 1
   vaddr = (u_int) exos_bufcache_insert (dev, blk, (void *)vaddr, usexn);
   assert (vaddr == BUFCACHE_ADDR (va2ppn(vaddr)));
#else
   vaddr = BUFCACHE_ADDR (va2ppn(vaddr));
#endif

   return ((void *) vaddr);
}
Beispiel #3
0
int
map_section(int k, int fd, SCNHDR *shdr, int envid) {
  u_int page_count;
  Pte *ptes;
  int i, retval = 0, type;
  off_t curloc = lseek(fd, 0, SEEK_CUR);
  u32 start, zero_start, len;

  if (!strcmp(shdr->s_name, ".text"))
    type = MS_TEXT;
  else if (!strcmp(shdr->s_name, ".data"))
    type = MS_DATA;
  else if (!strcmp(shdr->s_name, ".bss"))
    type = MS_BSS;
  else
    {
      type = MS_UNKNOWN;
      return 0;
    }

  page_count = PGNO(PGROUNDUP(shdr->s_size));
  if (type == MS_BSS) {
    start = PGROUNDUP(shdr->s_vaddr);
    if (start != shdr->s_vaddr) page_count--;
  }
  else
    start = shdr->s_vaddr;
  if ((ptes = malloc(sizeof(Pte) * page_count)) == 0) {
    return -1;
  }
  for (i=0; i < page_count; i++)
    ptes[i] = PG_U|PG_W|PG_P;
  
  if (sys_self_insert_pte_range(k, ptes, page_count, TEMP_REGION) < 0 ||
      sys_insert_pte_range(k, &vpt[PGNO(TEMP_REGION)], page_count, 
			   start, k, envid) < 0 ||
      (type != MS_BSS &&
       (lseek(fd, shdr->s_scnptr, SEEK_SET) != shdr->s_scnptr ||
	read(fd, (void*)TEMP_REGION, shdr->s_size) != shdr->s_size ||
	lseek(fd, curloc, SEEK_SET) != curloc))) {
    retval = -1;
  }
  if (type == MS_BSS) {
    zero_start = TEMP_REGION;
    len = page_count * NBPG;
  } else {
    zero_start = TEMP_REGION + shdr->s_size;
    len = NBPG - (zero_start & PGMASK);
  }
  bzero((void*)zero_start, len);
  if (type == MS_TEXT)
    mprotect((void*)TEMP_REGION, page_count*NBPG, PROT_READ);
  for (i=0; i < page_count; i++)
    ptes[i] = 0;
  sys_self_insert_pte_range(k, ptes, page_count, TEMP_REGION);
  if (retval == -1)
    sys_insert_pte_range(k, ptes, page_count, start, k, envid);
  return retval;
}
Beispiel #4
0
static int
pin_and_count_noncontigs (char *addr, u_int datalen)
{
  u_int data, next, ppn, prev_ppn;
  u_int noncontigs = 0;

  data = (u_int) addr;
  prev_ppn = si->si_nppages;

  while (datalen > 0) {
    ppn = PGNO (*va2ptep (data));
    if (ppn != (prev_ppn + 1)) {
      noncontigs++;
    }
    ppage_pin (&ppages[ppn]);
    prev_ppn = ppn;
    /* go to start of next page of data */
    next = (data & ~PGMASK) + NBPG;
    if (next - data >= datalen) break;
    datalen -= next - data;
    data = next;
  }

  return (noncontigs);
}
Beispiel #5
0
void
__replinish(void)
{
  u_int i;
  static int __in_replinishment = 0;

  if (__in_replinishment) return;

  if (UAREA.u_reserved_pages == __eea->eea_reserved_pages) return;

  __in_replinishment = 1;

  for (i=0; i < __eea->eea_reserved_pages; i++) {
    if (!(vpt[PGNO((u_int)__eea->eea_reserved_first) + i] & PG_P)) {
      if (_exos_self_insert_pte(CAP_ROOT, PG_P | PG_U | PG_W,
				(u_int)__eea->eea_reserved_first + i * NBPG, 0,
				NULL) < 0) {
	sys_cputs("__replinish: can't get new page\n");
      } else {
	if (++UAREA.u_reserved_pages == __eea->eea_reserved_pages) break;
      }
    }
  }

  __in_replinishment = 0;
}
Beispiel #6
0
static void msync_mark_bc(struct Mmap *m, caddr_t addr, size_t len) {
  u_int va;

  /* XXX */ return;
  for (va = (u_int)addr; va < (u_int)addr + len; va += NBPG)
    if (vpt[PGNO(va)] & PG_D) {
      /* dirty block */
      struct bc_entry *b = __bc_lookup(m->mmap_dev, va - (u_int)m->mmap_addr +
				       m->mmap_offset);

      assert(b);
      if (b->buf_dirty != BUF_DIRTY) {
	assert(sys_bc_set_dirty(b->buf_dev, b->buf_blk, 1) == 0);
      }
    }
}
Beispiel #7
0
/* Returns 0 on success */
int memmap(void *va, size_t length, int prot, void *pa)
{
    u_int numcompleted=0, i;
    u_int num_pages = PGNO(PGROUNDUP(length));
    int err;
    Pte *ptes = alloca(num_pages * sizeof(Pte));

    for (i = 0; i < num_pages; i++)
	ptes[i] = ((int)pa + i*NBPG) | prot | PG_GUEST;
    
    err = sys_insert_pte_range(0, ptes, num_pages, (u_int)va, &numcompleted,
			       0 /*  u_int ke FIXME */, vmstate.eid);
    
    if (err || numcompleted!=num_pages)
	return -1;
    else
	return 0;
}
Beispiel #8
0
void
disk_buf_free (struct buf *bp)
{
  u_int datalen;
  u_int data, next;

  /* unpin pages */
  if (bp->b_flags & B_SCSICMD) {
    struct scsicmd *scsicmd = (struct scsicmd *) bp->b_memaddr;
    data = (u_int) scsicmd->data_addr;
    datalen = scsicmd->datalen;
    free (scsicmd->scsi_cmd);
    free (scsicmd);
  } else {
    data = (u_int)bp->b_memaddr;
    datalen = bp->b_bcount;
  }

  while (datalen > 0) {
    if (bp->b_flags & B_BC_REQ) {
      ppage_unpin (kva2pp (data));
    } else {
      struct Env *e;
      int r;

      e = env_id2env (bp->b_envid, &r);
      assert (e); /* XXX - what if an env with an outstanding request dies? */
      ppage_unpin (&ppages[PGNO (*env_va2ptep (e, data))]);
    }
    /* go to start of next page of data */
    next = (data & ~PGMASK) + NBPG;
    if (next - data >= datalen) break;
    assert (next > data);
    datalen -= next - data;
    data = next;
  }

  if (bp->b_resptr) {
    ppage_unpin (kva2pp ((u_int) bp->b_resptr));
  }

  free (bp);
}
Beispiel #9
0
/* XXX currently if you mmap a file with a non page-aligned length, what you
   write past what you wanted to mmap will be written to disk (though not
   reflected in the size meta data on the disk, which is good).  It should
   zero it first to be correct. */
static int mmap_fault_handler(struct mregion_ustruct *mru, void *faddr,
			      unsigned int errcode) {
  struct Mmap *m = &(((struct mmap_ustruct*)mru)->m);
  u_int va = (u_int)faddr;
  Pte pte = PG_U | PG_P; /* new page should be present and user space */
  struct Xn_name *xn;
  struct Xn_name xn_nfs;

  /* if it's a write to a page that's not mapped writable then return */
  if ((errcode & FEC_WR) && !(m->mmap_prot & PROT_WRITE)) return 0;

  /* if writable requested... */
  if (m->mmap_prot & PROT_WRITE) pte |= PG_W;

  /* if shared requested... */
  if (m->mmap_flags & MAP_SHARED) pte |= PG_SHARED;


  /* if reading a page that's not present but is mapped private from a file
     then mark it copy-on-write so that it will reflect changes as long as
     possible (must be mapped writable as well) */
  if (!(errcode & FEC_WR) && ((m->mmap_flags &
			       (MAP_PRIVATE | MAP_FILE)) ==
			      (MAP_PRIVATE | MAP_FILE)) &&
      (pte & PG_W)) {
    pte |= PG_COW;
    pte &= ~PG_W;
  }

  /* if mapped anonymous... */
  if (m->mmap_flags & MAP_ANON) {
    /* currently maps a free page and zero's it */
    assert(_exos_self_insert_pte(0, pte, PGROUNDDOWN(va), 0, NULL) == 0);
    bzero((void*)PGROUNDDOWN(va), NBPG);

    return 1;
  }
  else { /* if mapping from a file */
    u_int seq;
    u_quad_t pblock;
    int done = 0, ret, fd;
    struct bc_entry *b;

    /* find a free file descriptor to use with the file pointer during
       the fault */
    for (fd = NR_OPEN - 1; fd >= 0; fd--)
      if (__current->fd[fd] == NULL) {
	__current->fd[fd] = m->mmap_filp;
	break;
      }
    assert (fd >= 0);

    /* if fault is from non-mapped page... */
    if (!(errcode & FEC_PR)) {
      /* map a page from the file */
      ret = bmap(fd, &pblock, m->mmap_offset + PGROUNDDOWN(va) -
		 (u_int)m->mmap_addr, &seq);
      if (ret == -EINVAL && !(m->mmap_flags & MAP_NOEXTEND)) {
	/* XXX File extension not possible for ExOS */
	assert(0);
      } else
	assert(ret == 0);
      assert(seq >= 0);
    mmap_retryMapPage:
      /* check if the block is in the buffer cache */
      while (!(b = __bc_lookup64(m->mmap_dev, pblock))) {
	if ((int)m->mmap_dev >= 0) {
	  /* disk device */
	  int count = 1;
	  /* _exos_bc_read_and_insert returns -E_EXISTS if *any* of the 
	     requested blocks are in the cache... */
	  /* read in up to 64k at a time */
	  while ((count <= seq) && (count < 16) &&
		 (!__bc_lookup64 (m->mmap_dev, (pblock+count)))) {
	    count++;
	  }
	  ret = _exos_bc_read_and_insert(m->mmap_dev, (unsigned int) pblock,
					 count, &done);
	  if (ret == 0)
	    /* sleep until request is completed... */
	    wk_waitfor_value_neq (&done, 0, 0);
	  else if (ret < 0 && ret != -E_EXISTS) {
	    kprintf ("_exos_bc_read_and_insert in mmap returned %d\n", ret);
	    panic ("mmap: error reading in block\n");
	  }
	} else {
	  /* nfs device */
	  if (nfs_bmap_read(fd, pblock) < 0)
	    panic ("mmap: error reading block from nfs\n");
	}
      }
      /* map the page */

      if (b->buf_dev > MAX_DISKS) {
	xn_nfs.xa_dev = b->buf_dev;
	xn_nfs.xa_name = 0;
	xn = &xn_nfs;
      } else {
	xn = &__sysinfo.si_pxn[b->buf_dev];
      }

      ret = _exos_self_insert_pte(0, (b->buf_ppn << PGSHIFT) | pte,
				  ((m->mmap_flags & MAP_PRIVATE) &&
				   (errcode & FEC_WR)) ?
				  MMAP_TEMP_REGION : PGROUNDDOWN(va),
				  ESIP_MMAPED, xn);
      /* make sure the page is completely read in */
      if (b->buf_state & BC_COMING_IN)
	wk_waitfor_value_neq(&b->buf_state, BC_VALID | BC_COMING_IN, 0);
      /* recheck that bc entry is still what we want */
      if (b == __bc_lookup64(m->mmap_dev, pblock)) {
	if (ret < 0) {
	  kprintf ("mmap: ret = %d\n", ret);
	  kprintf ("mmap: b->buf_dev = %d\n", b->buf_dev);
	  assert (0);
	}
      }
      else
	goto mmap_retryMapPage;

      /* if writing to a private page, then make a copy */
      if ((m->mmap_flags & MAP_PRIVATE) && (errcode & FEC_WR)) {
	assert(_exos_self_insert_pte(0, PG_P | PG_U | PG_W,
				     PGROUNDDOWN(va), 0, NULL) == 0);
	bcopy((void*)MMAP_TEMP_REGION, (void*)PGROUNDDOWN(va), NBPG);
	assert(_exos_self_unmap_page(0, MMAP_TEMP_REGION) == 0);
      }
    } else if ((m->mmap_flags & MAP_PRIVATE) && (errcode & FEC_WR) &&
	       (m->mmap_prot & PROT_WRITE)) {
      /* if fault is from a mapped page, but it needs copying... */
      /* perform cow */
      assert(_exos_self_insert_pte(0, PG_P | PG_U | PG_W,
				   MMAP_TEMP_REGION, ESIP_DONTPAGE, NULL) == 0);
      bcopy((void*)PGROUNDDOWN(va), (void*)MMAP_TEMP_REGION, NBPG);
      assert(_exos_self_insert_pte(0, vpt[PGNO(MMAP_TEMP_REGION)],
				   PGROUNDDOWN(va), 0, NULL) == 0);
      assert(_exos_self_unmap_page(0, MMAP_TEMP_REGION) == 0);
    } else { /* trying to write to a page that's mmap'd RO
		or read from system page???... */
      __current->fd[fd] = NULL;
      return 0;
    }

    /* free the file descriptor */
    __current->fd[fd] = NULL;
    return 1;
  }
}
Beispiel #10
0
caddr_t __mmap (void *addr, size_t len, int prot, int flags, int fd, 
		off_t offset, u_int ke, int envid) {
  Pte pte = PG_U | PG_P;
  u_quad_t pblock;
  struct stat sb;
  u_int vp = 0;
  dev_t dev;
  int ret = 0;
  u_int uaddr = (u_int )addr;
  u_int seq = 0;
  int done;
  struct bc_entry *b;
  off_t block_offset;

  block_offset = (offset & PGMASK);
  offset &= ~PGMASK;
  len += block_offset;

  if ((flags & MAP_PRIVATE) && (flags & MAP_COPY))
    flags &= ~MAP_PRIVATE;
  /* figure out which pte bits we want to set for the pages in this segment */
  if (prot & PROT_WRITE)
    if (flags & MAP_PRIVATE)
      pte |= PG_COW;
    else
      pte |= PG_W;
  else
    pte |= PG_RO;

  if (flags & MAP_SHARED) pte |= PG_SHARED;

  /* XXX -- need to check access on fd */
  
  /* deal with the address they want to map segment at */

  if (uaddr == (uint)NULL) {
    uaddr = (u_int)__malloc(len);
    assert (!(uaddr & PGMASK));
  } else {
    uaddr = uaddr & ~PGMASK;
  }

  /* get the device that this fd refers to */
  if (fstat (fd, &sb) < 0) {
    errno = EINVAL;
    return (caddr_t )-1;
  }
  dev = sb.st_dev;

  /* make sure all the blocks we're mapping are in the cache (if not
     we read them in) and map them in */

  for (vp = 0; vp < len;) {
    struct Xn_name *xn;
    struct Xn_name xn_nfs;

    /* get the largest extent that starts at this offset */
    if (bmap (fd, &pblock, offset, &seq) < 0) {
      errno = EBADF;
      return (caddr_t )-1;
    }

    /* can not close the race between a bc lookup and attempts to   */
    /* map the associated page (or read it in), so simply do things */
    /* optimisticly and repeat them if necessary.                   */
  __mmap_retryMapPage:
    /* check if the block is in the buffer cache */
    while (!(b = __bc_lookup64 (dev, pblock))) {
      if (dev >= 0) {
	/* disk device */
        int count = 1;
	done = 0;
        assert (seq >= 0);
	/* _exos_bc_read_and_insert returns -E_EXISTS if *any* of the */
	/* requested blocks is in the cache...                      */
        while ((count <= seq) && (count < 16) &&
	       (!__bc_lookup64 (dev, (pblock+count)))) {
	  count++;
        }
	ret = _exos_bc_read_and_insert (dev, (unsigned int)pblock, count,
					&done);
        if (ret == -E_EXISTS) {
	  continue;
        }
	if (ret < 0) {
	  kprintf ("_exos_bc_read_and_insert in mmap returned %d\n", ret);
	  panic ("mmap: error reading in block\n");
	}
	/* sleep until request is completed... */
        wk_waitfor_value_neq (&done, 0, 0);
      } else {
	/* nfs device */
	
	if (nfs_bmap_read (fd, pblock) < 0) {
	  panic ("mmap: error reading block from nfs\n");
	}
      }
    }

    if (b->buf_dev > MAX_DISKS) {
      xn_nfs.xa_dev = b->buf_dev;
      xn_nfs.xa_name = 0;
      xn = &xn_nfs;
    } else {
      xn = &__sysinfo.si_pxn[b->buf_dev];
    }

    if (flags & MAP_COPY) {
      int ret;

      ret = _exos_self_insert_pte (0, (b->buf_ppn << PGSHIFT) |
				    PG_P | PG_U | PG_W, MMAP_TEMP_REGION,
				    ESIP_DONTPAGE, xn);
      if (ret < 0) {
	kprintf ("mmap: ret = %d\n", ret);
	assert (0);
      }
      ret = _exos_self_insert_pte (0, PG_P | PG_U | PG_W,
				    MMAP_TEMP_REGION + NBPG,
				    ESIP_DONTPAGE, NULL);
      if (ret < 0) {
	kprintf ("mmap (2nd): ret = %d\n", ret);
	assert (0);
      }      
      if (b->buf_state & BC_COMING_IN)
	wk_waitfor_value_neq(&b->buf_state, BC_VALID | BC_COMING_IN, 0);
      bcopy((void*)MMAP_TEMP_REGION, (void*)(MMAP_TEMP_REGION + NBPG), NBPG);
      assert(_exos_insert_pte (0, (vpt[PGNO(MMAP_TEMP_REGION + NBPG)] & ~PGMASK)
			       | pte | PG_D, uaddr + vp, ke, envid, 0, NULL) >= 0);
      assert(_exos_self_unmap_page (0, MMAP_TEMP_REGION) >= 0);
      assert(_exos_self_unmap_page (0, MMAP_TEMP_REGION + NBPG) >= 0);
    } else {
      ret = sys_bc_buffer_map (xn, CAP_ROOT, (b->buf_ppn << PGSHIFT) | pte, uaddr + vp,
			       ke, envid);
      if (b->buf_state & BC_COMING_IN)
	wk_waitfor_value_neq(&b->buf_state, BC_VALID | BC_COMING_IN, 0);
    }

    /* recheck that bc entry is still what we want */
    if (b == __bc_lookup64 (dev, pblock)) {
      assert (ret >= 0);
    } else {
      goto __mmap_retryMapPage;
    }
    
    offset += NBPG;
    vp += NBPG;
  }
  
  return (caddr_t )uaddr + block_offset;
}
Beispiel #11
0
/* load an EXOS_MAGIC binary */
int
__do_simple_load (int fd, struct Env *e)
{
  // struct Uenv cu;

  u_int start_text_addr, start_text_pg;
  struct exec hdr;
  u_int text_size, data_size, bss_size, overlap_size;
  u_int envid = e->env_id;


  /* read a.out headers */
  if (lseek(fd, 0, SEEK_SET) == -1 ||
      read(fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
      lseek(fd, sizeof(hdr) + hdr.a_text, SEEK_SET) == -1 ||
      read(fd, &start_text_addr, sizeof(start_text_addr)) != 
          sizeof(start_text_addr)) 
  {
    errornf("Invalid executable format.\n");
  }

  start_text_pg = PGROUNDDOWN(start_text_addr);
  text_size = hdr.a_text + sizeof(hdr);
  data_size = hdr.a_data;
  if (text_size % NBPG) {
    data_size += text_size % NBPG;
    text_size = PGROUNDDOWN(text_size);
  }
  bss_size = hdr.a_bss;
  
  
  if (!(data_size % NBPG))
    overlap_size = 0;
  else
  {
    /* read in the page that contains both bss and inited data */
    u_int temp_page;
     
    temp_page = (u_int)__malloc(NBPG);
    overlap_size = NBPG;
   
    if (temp_page == 0 || 
	lseek(fd, text_size + PGROUNDDOWN(data_size), SEEK_SET) == -1 ||
        read(fd, (void*)temp_page, data_size % NBPG) != data_size % NBPG ||
        _exos_insert_pte
	  (0, vpt[PGNO(temp_page)], start_text_pg + text_size +
	   PGROUNDDOWN(data_size), 0, envid, 0, NULL) != 0) 
    {
      _exos_self_unmap_page(0, temp_page);
      error("Error mmaping text segment\n");
    }
    
    bzero((void*)temp_page + (data_size % NBPG),
          NBPG - (data_size % NBPG));
    _exos_self_unmap_page(0, temp_page);
    __free((void*)temp_page);
    bss_size -= NBPG - (data_size % NBPG);
    bss_size = PGROUNDUP(bss_size);
    data_size = PGROUNDDOWN(data_size);

  }


  /* mmap the text segment readonly */
  if ((u_int)__mmap((void*)start_text_pg, text_size, PROT_READ  | PROT_EXEC, 
		    MAP_FILE | MAP_FIXED | MAP_COPY, fd, (off_t)0, 0, envid)
	!= start_text_pg) 
  {
    errornf("Error mmaping text segment\n");
  }

  /* mmap the data segment read/write */
  if ((u_int)__mmap((void*)(start_text_pg + text_size), data_size,
		    PROT_READ | PROT_WRITE | PROT_EXEC,
		    MAP_FILE | MAP_FIXED | MAP_COPY,
		    fd, text_size, (off_t)0, envid)
	!= start_text_pg + text_size) 
  {
    errornf("Error mmaping data segment\n");
  }

#if 0 /* we set up a stack page later on when setting up arguments */
  /* allocate a stack page */
  if (_exos_insert_pte (0, PG_U|PG_W|PG_P, USTACKTOP-NBPG, 0, envid, 0,
			NULL) < 0) 
  {
    errornf("could not allocate stack\n");
  }
#endif

  /* set the entry point */
  assert(e->env_id == envid);
  e->env_tf.tf_eip = start_text_addr;

  return 1;
}
Beispiel #12
0
static int
setup_new_stack_simple(char *const argv[], char *const env[], 
                       u_int envid, u_int *newesp)
{
  int len=0, argc=0, envc=0, pages_needed, i;
  void *pages;
  u_int p;
  char **argvstr;
  char **envstr;

  if (!argv || !env) return -EFAULT;

  /* figure out how much we'll be putting on the new stack */

  /* size of new args */
  while (argv[argc]) len += strlen(argv[argc++])+1;
  
  /* size of env vars  */
  while (env[envc]) len += strlen(env[envc++])+1;

  /* for the pointers to the args' null termination */
  len += (argc + 1) * sizeof(void*);
  
  /* for the pointers to the env vars' null termination */
  len += (envc + 1) * sizeof(void*);

  /* leave a space argc */
  len += sizeof(int); 
  
  /* leave a space for argv */
  len += sizeof(char*);
  
  /* leave a space for env */
  len += sizeof(char*);
  
  len = ALIGN(len);

  /* calculate how many pages we need, and always allocate at least 1 page */
  pages_needed = PGNO(PGROUNDUP(len))+1;
  pages = __malloc(pages_needed*NBPG);
  if (!pages) return -ENOMEM;

  /* now we put things on the page */
  p = (u_int)pages + pages_needed*NBPG - len;

  /* first, the argc */
  *(int*)p = argc;
  p += sizeof(int);

  /* then, the argv pointer */
  *(int*)p = (USTACKTOP - ((u_int)pages + pages_needed*NBPG - 
                           (p+sizeof(char*)+sizeof(char*))));
  p += sizeof(char*);

  /* then, the env pointer */
  *(int*)p = (USTACKTOP - ((u_int)pages + pages_needed*NBPG -
      			   (p+sizeof(char*)+(argc+1)*sizeof(char*))));
  p += sizeof(char*);

  /* now, all the pointers to arguments */
  argvstr = (char**)p;
  p += (argc+1)*sizeof(char*);

  /* then, all the pointers to env vars */
  envstr = (char**)p;
  p += (envc+1)*sizeof(char*);

  /* copy in arguments, set up pointers to arguments */
  for (i=0; i < argc; i++) {
    argvstr[i] = (char*)(USTACKTOP - ((u_int)pages + pages_needed*NBPG - p));
    strcpy((char*)p, argv[i]);
    p += strlen(argv[i])+1;
  }
  argvstr[argc] = NULL;

  /* copy in env variables, set up pointers to env vars */
  for (i=0; i < envc; i++) {
    envstr[i] = (char*)(USTACKTOP - ((u_int)pages + pages_needed*NBPG - p));
    strcpy((char*)p, env[i]);
    p += strlen(env[i])+1;
  }
  envstr[envc] = NULL;


  /* now map the pages into the new process */
  *(char*)pages = 0;
  if (__vm_share_region((u_int)pages, pages_needed*NBPG, 0, 0, envid,
			USTACKTOP-pages_needed*NBPG) == -1 ||
      __vm_free_region((u_int)pages, pages_needed*NBPG, 0) == -1) {
    __free(pages);
    return -ENOMEM;
  }
  __free(pages);

  /* dude, this is totally a GCC thing: gcc convention says when starting
   * a program first argument is esp+4. That's the -4 here */
  *newesp = USTACKTOP - len - 4;
  
  return 0;
}
Beispiel #13
0
static int
setup_new_stack(char *const argv[], char *const env[], u_int envid,
		u_int *newesp, struct _exos_exec_args *eea)
{
  int len=0, envsize=0, argsize=0, argc=0, envc=0, pages_needed, i;
  void *pages;
  u_int p, padding, reserved_start;
  struct ps_strings *ps;
  char **argvstr, **envstr;

  /* XXX - sanity check ponters */
  if (!argv || !env) return -EFAULT;

  /* figure out how much we'll be putting on the new stack */
  len += sizeof(struct ps_strings);
  len = ALIGN(len);
  len += sizeof(struct _exos_exec_args);
  len = ALIGN(len);
  /* the reserved (mapped, but unused) pages */
  padding = PGROUNDUP(len) - len;
  len += padding;
  len += NBPG * __RESERVED_PAGES;
  reserved_start = len;
  /* size of new args */
  while (argv[argc]) argsize += strlen(argv[argc++])+1;
  /* size of new environment */
  while (env[envc]) envsize += strlen(env[envc++])+1;
  len += envsize + argsize;
  len = ALIGN(len);
  /* for the pointers to the args & envs and their null termination */
  len += (envc + 1 + argc + 1) * sizeof(void*);
  len += sizeof(int); /* the argc */
  /* extra page so child has at least one totally free stack page */
  pages_needed = PGNO(PGROUNDUP(len)) + 1;
  pages = __malloc(pages_needed*NBPG);
  if (!pages) return -ENOMEM;
  p = (u_int)pages + pages_needed*NBPG - len;
  *(int*)p = argc;
  p += sizeof(int);
  argvstr = (char**)p;
  p += (argc+1)*sizeof(char*);
  envstr = (char**)p;
  p += (envc+1)*sizeof(char*);
  for (i=0; i < argc; i++) {
    strcpy((char*)p, argv[i]);
    argvstr[i] = (char*)(USTACKTOP - ((u_int)pages + pages_needed*NBPG - p));
    p += strlen(argv[i])+1;
  }
  argvstr[argc] = NULL;
  for (i=0; i < envc; i++) {
    strcpy((char*)p, env[i]);
    envstr[i] = (char*)(USTACKTOP - ((u_int)pages + pages_needed*NBPG - p));
    p += strlen(env[i])+1;
  }
  envstr[envc] = NULL;
  /* map (via touching) the reserved pages */
  for (i=0; i < __RESERVED_PAGES; i++) {
    *(u_int*)p = 0;
    p += NBPG;
  }
  p += padding;
  /* continue... */
  eea->eea_reserved_pages = __RESERVED_PAGES;
  eea->eea_reserved_first = (char*)(USTACKTOP - reserved_start);
  p = ALIGN(p);
  *(struct _exos_exec_args*)p = *eea;
  p += sizeof(struct _exos_exec_args);
  p = ALIGN(p);
  ps = (struct ps_strings*)p;
  ps->ps_argvstr = (char**)(USTACKTOP - ((u_int)pages + pages_needed*NBPG -
					 (u_int)argvstr));
  ps->ps_nargvstr = argc;
  ps->ps_envstr = (char**)(USTACKTOP - ((u_int)pages + pages_needed*NBPG -
					(u_int)envstr));
  ps->ps_nenvstr = envc;
  /* now map the pages into the new process */
  /* XXX - touch clean page so it'll be mapped */
  *(char*)pages = 0;
  if (__vm_share_region((u_int)pages, pages_needed*NBPG, 0, 0, envid,
			USTACKTOP-pages_needed*NBPG) == -1 ||
      __vm_free_region((u_int)pages, pages_needed*NBPG, 0) == -1) {
    __free(pages);
    return -ENOMEM;
  }
  __free(pages);
  *newesp = USTACKTOP - len;

  return 0;
}
Beispiel #14
0
u_int
__load_prog_fd(int fd, int _static, u_int envid)
{
  u_int start_text_addr;
  struct exec hdr;
  u_int text_size, data_size, bss_size, overlap_size;
  u_int dynamic, start_text_pg;

  /* read a.out headers */
  if (lseek(fd, 0, SEEK_SET) == -1 ||
      read(fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
      lseek(fd, sizeof(hdr) + hdr.a_text, SEEK_SET) == -1 ||
      read(fd, &dynamic, sizeof(dynamic)) != sizeof(dynamic) ||
      read(fd, &start_text_addr, sizeof(start_text_addr)) !=
      sizeof(start_text_addr)) {
    fprintf(stderr,"Invalid executable format.\n");
    errno = ENOEXEC;
    goto err;
  }
  start_text_pg = PGROUNDDOWN(start_text_addr);
  text_size = hdr.a_text + sizeof(hdr);
  data_size = hdr.a_data;
  if (text_size % NBPG) {
    data_size += text_size % NBPG;
    text_size = PGROUNDDOWN(text_size);
  }
  bss_size = hdr.a_bss;
  if (_static) {
    if (!(data_size % NBPG))
      overlap_size = 0;
    else
      {
	/* read in the page that contains both bss and inited data */
	u_int temp_page;
	
	temp_page = (u_int)__malloc(NBPG);
	overlap_size = NBPG;
	if (temp_page == 0 ||
	    lseek(fd, text_size + PGROUNDDOWN(data_size),
		  SEEK_SET) == -1 ||
	    read(fd, (void*)temp_page, data_size % NBPG) !=
	    data_size % NBPG ||
	    _exos_insert_pte(0, vpt[PGNO(temp_page)],
			     start_text_pg + text_size +
			     PGROUNDDOWN(data_size), 0, envid, 0, NULL) != 0) {
	  _exos_self_unmap_page(0, temp_page);
	  __free((void*)temp_page);
	  fprintf(stderr,"Error mmaping text segment\n");
	  goto err;
	}
	bzero((void*)temp_page + (data_size % NBPG),
	      NBPG - (data_size % NBPG));
	_exos_self_unmap_page(0, temp_page);
	__free((void*)temp_page);
	bss_size -= NBPG - (data_size % NBPG);
	bss_size = PGROUNDUP(bss_size);
	data_size = PGROUNDDOWN(data_size);
      }
    /* mmap the text segment readonly */
    if ((u_int)__mmap((void*)start_text_pg, text_size,
		      PROT_READ  | PROT_EXEC, 
		      MAP_FILE | MAP_FIXED | MAP_COPY, fd, (off_t)0, 0,
		      envid)
	!= start_text_pg) {
      fprintf(stderr,"Error mmaping text segment\n");
      goto err;
    }
    /* mmap the data segment read/write */
    if ((u_int)__mmap((void*)(start_text_pg + text_size), data_size,
		      PROT_READ | PROT_WRITE | PROT_EXEC,
		      MAP_FILE | MAP_FIXED | MAP_COPY,
		      fd, text_size, (off_t)0, envid)
	!= start_text_pg + text_size) {
      fprintf(stderr,"Error mmaping data segment\n");
      goto err;
    }
  } else {
    /* if dynamic... */
    u_int mflags;
    if (!(data_size % NBPG))
      overlap_size = 0;
    else
      {
	/* read in the page that contains both bss and inited data */
	overlap_size = NBPG;
	if (_exos_self_insert_pte(0, PG_P | PG_W | PG_U,
				  start_text_pg + text_size +
				  PGROUNDDOWN(data_size), 0, NULL) < 0 ||
	    lseek(fd, text_size + PGROUNDDOWN(data_size),
		  SEEK_SET) == -1 ||
	    read(fd, (void*)(start_text_pg + text_size +
			     PGROUNDDOWN(data_size)),
		 data_size % NBPG) != data_size % NBPG) {
	  fprintf(stderr,"Error mmaping text segment\n");
	  goto err;
	}
	bzero((void*)(start_text_pg + text_size + data_size),
	      NBPG - (data_size % NBPG));
	bss_size -= NBPG - (data_size % NBPG);
	bss_size = PGROUNDUP(bss_size);
	data_size = PGROUNDDOWN(data_size);
      }
    /* mmap the text segment readonly */
    mflags = MAP_FILE | MAP_FIXED;
    if (getenv("NO_DEMAND_LOAD"))
      mflags |= MAP_COPY;
    else
      mflags |= MAP_SHARED;
    if ((u_int)mmap((void*)start_text_pg, text_size,
		    PROT_READ | PROT_EXEC, 
		    mflags, fd, (off_t)0) != start_text_pg) {
      fprintf(stderr,"Error mmaping text segment\n");
      goto err;
    }
    /* mmap the data segment read/write */
    if (!(mflags & MAP_COPY)) mflags = MAP_FILE | MAP_FIXED | MAP_PRIVATE;
    if ((u_int)mmap((void*)(start_text_pg + text_size), data_size,
		    PROT_READ | PROT_WRITE | PROT_EXEC, mflags, fd,
		    (off_t)text_size) != start_text_pg + text_size) {
      fprintf(stderr,"Error mmaping data segment: %d\n", errno);
      goto err;
    }
    /* mmap the bss as demand zero'd */
    if ((u_int)mmap((void*)(start_text_pg + text_size + data_size +
			    overlap_size),
		    bss_size, PROT_READ | PROT_WRITE | PROT_EXEC,
		    MAP_ANON | MAP_FIXED | MAP_PRIVATE,
		    (off_t)-1, 0) !=
	start_text_pg + text_size + data_size + overlap_size) {
      fprintf(stderr,"Error mmaping bss\n");
      goto err;
    }
  }

  return start_text_addr;

err:
  return 0;
}
Beispiel #15
0
int
map_section(int k, int fd, SCNHDR *shdr, int envid)
{
  u_int page_count;
  Pte *ptes;
  int i, retval = 0, type;
  off_t curloc = lseek(fd, 0, SEEK_CUR);
  u32 start, zero_start, len;
  u_int temp_pages, num_completed, num_completed2;

  if (!strcmp(shdr->s_name, ".text"))
    type = MS_TEXT;
  else if (!strcmp(shdr->s_name, ".data"))
    type = MS_DATA;
  else if (!strcmp(shdr->s_name, ".bss"))
    type = MS_BSS;
  else
    {
      type = MS_UNKNOWN;
      return 0;
    }

  page_count = PGNO(PGROUNDUP(shdr->s_size));
  if (type == MS_BSS) {
    start = PGROUNDUP(shdr->s_vaddr);
    if (start != shdr->s_vaddr) page_count--;
  }
  else
    start = shdr->s_vaddr;
  if ((ptes = __malloc(sizeof(Pte) * page_count)) == 0) {
    return -1;
  }
  for (i=0; i < page_count; i++)
    ptes[i] = PG_U|PG_W|PG_P;
  
  temp_pages = (u_int)__malloc(page_count * NBPG);
  num_completed = 0;
  num_completed2 = 0;
  if (temp_pages == 0 ||
      _exos_self_insert_pte_range(k, ptes, page_count, temp_pages,
				  &num_completed, 0, NULL) < 0 ||
      _exos_insert_pte_range(k, &vpt[PGNO(temp_pages)], page_count, 
			     start, &num_completed2, k, envid, 0, NULL) < 0 ||
      (type != MS_BSS &&
       (lseek(fd, shdr->s_scnptr, SEEK_SET) != shdr->s_scnptr ||
	read(fd, (void*)temp_pages, shdr->s_size) != shdr->s_size ||
	lseek(fd, curloc, SEEK_SET) != curloc))) {
    if (temp_pages) __free((void*)temp_pages);
    retval = -1;
  }
  if (type == MS_BSS) {
    zero_start = temp_pages;
    len = page_count * NBPG;
  } else {
    zero_start = temp_pages + shdr->s_size;
    len = NBPG - (zero_start & PGMASK);
  }
  bzero((void*)zero_start, len);
  if (type == MS_TEXT)
    mprotect((void*)temp_pages, page_count*NBPG, PROT_READ);
  for (i=0; i < page_count; i++)
    ptes[i] = 0;
  num_completed = 0;
  _exos_self_insert_pte_range(k, ptes, page_count, temp_pages, &num_completed,
			      0, NULL);
  if (retval == -1) {
    num_completed2 = 0;
    _exos_insert_pte_range(k, ptes, page_count, start, &num_completed2, k,
			   envid, 0, NULL);
  }

  if (temp_pages) __free((void*)temp_pages);
  __free(ptes);
  return retval;
}
Beispiel #16
0
int
__load_sl_image (int fd)
{
  struct exec hdr;
  int full_data_bytes;
  int full_text_bytes;
  int full_bss_bytes;
  int text_fragment;
  int data_fragment;

  /* read a.out headers */
  if (lseek(fd, 0, SEEK_SET) == -1 ||
      read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
    fprintf(stderr, "Invalid executable format.\n");
    return -ENOEXEC;
  }

  full_text_bytes = hdr.a_text + sizeof (hdr);
  full_data_bytes = hdr.a_data;
  full_bss_bytes = hdr.a_bss;
  text_fragment = full_text_bytes % NBPG;
  if (text_fragment) {
    full_text_bytes -= text_fragment;
    full_data_bytes += text_fragment;
  }
  data_fragment = full_data_bytes % NBPG;
  if (data_fragment) full_bss_bytes += NBPG - data_fragment;

  if (full_text_bytes > 0) {
    /* map in text segment */
    if (mmap((void*)SHARED_LIBRARY_START_RODATA, full_text_bytes,
	     PROT_READ | PROT_EXEC,
	     MAP_COPY | MAP_SHARED | MAP_FIXED | MAP_FILE, fd, (off_t)0) !=
	(void*)SHARED_LIBRARY_START_RODATA) {
      fprintf(stderr, "__load_sl_image: could not mmap text (errno = %d)\n",
	      errno);
      return -ENOEXEC;
    }
  }

  if (full_data_bytes > 0) {
    /* map in data segment */
    if (mmap((void*)SHARED_LIBRARY_START_RODATA+full_text_bytes,
	     full_data_bytes, PROT_READ,
	     MAP_COPY | MAP_SHARED | MAP_FIXED | MAP_FILE, fd,
	     (off_t)full_text_bytes) != (void*)SHARED_LIBRARY_START_RODATA +
	full_text_bytes) {
      fprintf(stderr, "__load_sl_image: could not mmap data (errno = %d)\n",
	      errno);
      return -ENOEXEC;
    }
    if (data_fragment) {
      assert(sys_self_mod_pte_range(0, PG_W, 0, SHARED_LIBRARY_START_RODATA +
				    full_text_bytes + full_data_bytes, 1)
	     == 0);
      bzero((void*)SHARED_LIBRARY_START_RODATA + full_text_bytes +
	    full_data_bytes, NBPG - data_fragment);
      assert(sys_self_mod_pte_range(0, 0, PG_W, SHARED_LIBRARY_START_RODATA +
				    full_text_bytes + full_data_bytes, 1)
	     == 0);
    }
  }

  /* bss will be mmaped at startup */

  /* make a private copy of the first page so we can store sl data at the */
  /* beginning.  alternatively, you might want to put it in the uarea */
  /* note: we know the text segment will have at least one page because the */
  /*  shared library is big! */
  assert(sys_self_mod_pte_range(0, PG_COW, PG_RO,
				SHARED_LIBRARY_START_RODATA, 1) == 0);
  sl_data->text_pages = PGNO(full_text_bytes);
  sl_data->data_pages = PGNO(PGROUNDUP(full_data_bytes));
  sl_data->bss_pages = PGNO(PGROUNDUP(full_bss_bytes));
    
  return (0);
}
Beispiel #17
0
/* A predicate is represented as a sum-of-products, that is
   (A1 A2 ... ) OR (B1 B2 ...) OR ...
   where each element in a product (the A?'s and B?'s) are simple
   predicates like v > 10.

   Predicates are represented in memory as an array of wk_term's,
   one term for each immediate, variable, operator, conjunction or
   disjunction. A single product is considered to be a group of
   contiguous wk_term's that are not WK_ORs. The whole mess is
   terminated by a WK_END.  */

#include <vcode/vcode.h>
#include <xok/wk.h>
#include <xok/mmu.h>
#include <xok/sys_proto.h>
#include <xok/kerrno.h>
#include <xok/malloc.h>
#include <xok_include/assert.h>
#include <xok/printf.h>

#ifndef __CAP__
#include <xok/pmapP.h>
#else
#include <xok/pmap.h>
#endif

#define WK_MAX_CODE_BYTES 4096

#define OVERRUN_SAFETY 20
#define OVERRUN_CHECK						\
{								\
  if (v_ip > code + WK_MAX_CODE_BYTES - OVERRUN_SAFETY) {	\
    warn ("wk_compile: out of code space\n");			\
    ret = -E_INVAL;						\
    goto error;							\
  }								\
}

static int next_pp; /* outside function so can be used by cleanup code */
static int wk_compile (struct wk_term *t, int sz, char *code,
		       u_int *pred_pages) {
  int i;
  v_reg_t r1, r2, z, tag;
  v_label_t end_of_term;
  int start_term = 1;
  int op1 = 1;
  cap c;
  struct Ppage *pp;
  u_int ppn;
  int ret = 0;

  next_pp = 0;

  v_lambda ("", "", NULL, 1, code, WK_MAX_CODE_BYTES);
  if (!v_getreg (&r1, V_U, V_TEMP) ||
      !v_getreg (&r2, V_U, V_TEMP) ||
      !v_getreg (&z, V_U, V_TEMP) ||
      !v_getreg (&tag, V_U, V_TEMP))
    panic ("wk_compile: architecture doesn't have enough registers.");

  v_setu (tag, -1);
  v_setu (z, 0);  
  
  for (i = 0; i < sz; i++) {
    if (start_term) {
      end_of_term = v_genlabel ();
      start_term = 0;
    }
    OVERRUN_CHECK;
    switch (t[i].wk_type) {
    case WK_VAR:
      if (next_pp >= WK_MAX_PP-1) {
	warn ("wk_compile: too many pages in predicate\n");
	ret = -E_INVAL;
	goto error;
      }
      if ((ret = env_getcap (curenv, t[i].wk_cap, &c)) < 0) {
	goto error;
      }
      ppn = PGNO((u_int)t[i].wk_var);
      if (!ppn || ppn >= nppage) {
	printf ("at index %d\n", i);
	warn ("wk_compile: invalid physical page\n");
	ret = -E_INVAL;
	goto error;
      }
      pp = ppages_get(ppn);
      switch (Ppage_pp_status_get(pp)) {
      case PP_USER:
	if ((ret = ppage_acl_check(pp,&c,PP_ACL_LEN,0)) < 0) {
	  goto error;
	}
	ppage_pin (pp);
	pred_pages[next_pp++] = ppn;
	break;
      case PP_KERNRO:
	/* user can access pages that each env get's mapped r/o */
	break;
      default:
	printf ("at index %d\n", i);
	warn ("wk_compile: attempt to reference non PP_KERNRO or PP_USER page\n");
	ret = -E_INVAL;
	goto error;
      }
      if (op1) {
	v_ldui (r1, z, (int )ptov (t[i].wk_var));
	op1 = 0;
      } else {
	v_ldui (r2, z, (int )ptov (t[i].wk_var));
	op1 = 1;
      }
      break;
    case WK_IMM:
      if (op1) {
	v_setu (r1, t[i].wk_imm);
	op1 = 0;
      } else {
	v_setu (r2, t[i].wk_imm);
	op1 = 1;
      }
      break;
    case WK_TAG: {
      v_setu (tag, t[i].wk_tag);
      break;
    }
    case WK_OP: {
      switch (t[i].wk_op) {
      case WK_GT: {
	v_bleu (r1, r2, end_of_term); 
	break;
      }
      case WK_GTE: {
	v_bltu (r1, r2, end_of_term); 
	break;
      }
      case WK_LT: {
	v_bgeu (r1, r2, end_of_term);
	break;
      }
      case WK_LTE: {
	v_bgtu (r1, r2, end_of_term); 
	break;
      }
      case WK_EQ: {
	v_bneu (r1, r2, end_of_term);
	break;
      }
      case WK_NEQ: {
	v_bequ (r1, r2, end_of_term);
	break;
      }
      case WK_OR: {
	v_retu (tag);
	v_label (end_of_term);
	start_term = 1; 
	break;
      }
      default: {
	printf ("at index %d\n", i);
	warn ("wk_compile: invalid wk-pred instruction\n");
	ret = -E_INVAL;
	goto error;
      }
      }
      break;
    }
    default:
      printf ("at index %d\n", i);
      warn ("wk_compile: invalid wk-pred type\n");
      ret = -E_INVAL;
      goto error;
    }
  }
      
  /* end the last term */
  OVERRUN_CHECK;
  v_retu (tag);
  v_label (end_of_term);

  v_retui (0);
  v_end (NULL);

error:
  /* have to do this even on error so that our caller can just call
     wk_free to clean memory/ref counts up */
  pred_pages[next_pp] = 0;
  curenv->env_pred_pgs = pred_pages;
  curenv->env_pred = (Spred)code;
  return ret;
}
Beispiel #18
0
/* everything is based on this exec, we have an extra argument
   (execonly) to differentiate between the exec and fork_exec
   families.  the difference is that the latter forks and then execs
   the process thereby returning in the parent
   */
static int
fork_execve0(const char *path, char *const argv_ori[], char * const envptr[], 
	     int execonly) {
  int envid;
  int fd;

  u_int k = 0;
  struct Uenv cu;
  int argc;
  char **argv_tmp,*argv_p,**argv;
  u32 lmagic;
  char *cmagic = (char*)&lmagic;
  u32 entry_point = 0;
  int running_emulator = 0;
  char *emu_path = NULL;
  char **argv_ori_tmp = (char**)argv_ori;
  char **argv_ori_tmp2;
#define FE_PATH (running_emulator ? emu_path : path)
#ifdef PROCESS_TABLE
  int NewPid = 0;
#endif
  char *extra_argv_space = NULL;
#if 0
  {
    extern void pr_fds();
    fprintf(stderr,"FDS BEFORE EXEC PID: %d execonly: %d\n",getpid(),execonly);
    pr_fds;
  }
#endif
//  printf("fork_execve0: path: %s\n",path);
#if 0
  for(argc = 0; argv_ori[argc] ; argc++) 
    kprintf("%d) %s\n",argc,argv_ori[argc]);
#endif

  proprintf("allocate env, and open file\n");
  ISTART(misc,execve);
  ISTART(misc,step1);
  /* fprintf(stderr,"fe_nfs2 %s\n",FE_PATH); */
  if (! (envid = sys_env_alloc (0))) {
    fprintf(stderr,"could not sys_env_alloc\n");
    goto err;
  }

open_binary:
  /* verify executable permission */
  if (access(FE_PATH, X_OK) == -1) {
    /* access will set errno */
    goto fork_execve0_end_error;
  }
  /* open the executable */
  fd = open (FE_PATH, O_RDONLY, 0644);
  if (fd < 0) {
    /* open will set errno */
    /*fprintf(stderr,"1could not open path %s, errno: %d\n",FE_PATH,errno);*/
    goto fork_execve0_end_error;
  }
  STOPP(misc,step1);
  proprintf("read file and open interpreter if necessary\n");
  ISTART(misc,step2);

  /* read in the magic number */
  if (read(fd, cmagic, 2) < 2) {
    errno = ENOEXEC;
    goto fork_execve0_end_error_closefd;
  }

  /* check for interpreter */
  if (cmagic[0] == '#' && cmagic[1] == '!') {
    int intersize;
    char inter[MAXINTERP+1], *interp;

    if ((intersize = read(fd,inter,MAXINTERP)) == -1) {
      errno = ENOEXEC;
      goto fork_execve0_end_error_closefd;
    }
    inter[intersize] = '\n';
    interp = inter;
    
    /* skip spaces */
    while ((*interp == ' ' || *interp == '\t') && 
	   *interp != '\n')
      interp++;
    
    {
      char **v = (char **)argv_ori_tmp;
      while(*v++);
      extra_argv_space = (char *)malloc((char *)v - (char *)argv_ori_tmp + 
					MAXINTERP*(sizeof(char*)));
      if (!extra_argv_space) {
	fprintf(stderr,"execve could not allocate extra argv space for "
		"interpreted file\n");
	goto fork_execve0_end_error_closefd;
      }
    }
    argv = (char **)extra_argv_space;
    *argv++ = interp;

    while (*interp != ' ' && *interp != '\t' && 
	   *interp != (char)0 && *interp != '\n')
      interp++;
    
    if (*interp != 0 && *interp != '\n') {
      /* more arguments, we only copy one more */
      *interp++ = 0;
      /* skip spaces */
      while ((*interp == ' ' || *interp == '\t') && 
	     *interp != '\n')
	interp++;

      *argv++ = interp;
      while (*interp != ' ' && *interp != '\t' && 
	     *interp != (char)0 && *interp != '\n')
	interp++;
      *interp = (char)0;
    } else {
      *interp = (char)0;
    }
    *argv++ = (char *)FE_PATH;
    argv_ori_tmp2 = argv_ori_tmp;
    argv_ori_tmp2++;
    while(*argv_ori_tmp2 != (char *) 0) {*argv++ = *argv_ori_tmp2++;}
    /* copy the 0 */
    *argv++ = *argv_ori_tmp2++;
    argv = (char **)extra_argv_space;
  
    close(fd);
    /* verify executable permission */
    if (access(argv[0], X_OK) == -1) {
      /* access will set errno */
      goto fork_execve0_end_error;
    }
    fd = open (argv[0], O_RDONLY, 0644);
    if (fd < 0) {
      /* open will set errno */
      /*fprintf(stderr,"2could not open path %s, errno: %d\n",argv[0],errno);*/
      goto fork_execve0_end_error;
    }
    /* read in the magic number (and nesting of interpreters not allowed) */
    if (read(fd, cmagic, 2) < 2 || (cmagic[0] == '#' && cmagic[1] == '!')) {
      errno = ENOEXEC;
      goto fork_execve0_end_error_closefd;
    }
  } else {
    argv = (char **)argv_ori_tmp;
  }

  STOPP(misc,step2);
  proprintf("read more magic and executable headers, check for emulation\n");
  ISTART(misc,step3);

  if (read(fd, &cmagic[2], 2) < 2) {
    errno = ENOEXEC;
    goto fork_execve0_end_error_closefd;
  }

  /* see whether we need to run an emulator */
  if (lmagic != 0700303000 && (lmagic & 0x0000ffff) != 0514) {
    if (running_emulator) {
      fprintf(stderr,"Emulator binary is in unrecognized executable "
	      "format.\n");
      goto fork_execve0_end_error_closefd;
    }
    else
      fprintf(stderr,"Unrecognized executable format; attempting to run " 
	      "emulator.\n");
    close(fd);
    {
      char **v = (char **)argv_ori_tmp;
      while(*v++);
      extra_argv_space = (char *)malloc((char *)v - (char *)argv_ori_tmp +
					2 * sizeof(char*));
      if (!extra_argv_space) {
	fprintf(stderr,"execve could not allocate extra argv space for "
		"copying args for emulator\n");
	goto fork_execve0_end_error_closefd;
      }
    }
    argv = (char **)extra_argv_space;
    running_emulator = 1;
    *argv++ = emu_path = getenv("EMULATOR");
    if (!FE_PATH) {
      fprintf(stderr,"EMULATOR environment variable not set to emulator path; "
	      "cannot run emulator.\n");
      goto fork_execve0_end_error;
    }
    *argv++ = (char *)path;
    argv_ori_tmp2 = argv_ori_tmp;
    argv_ori_tmp2++;
    while(*argv_ori_tmp2 != (char *) 0) {*argv++ = *argv_ori_tmp2++;}
    /* copy the 0 */
    *argv++ = *argv_ori_tmp2++;
    argv = (char **)extra_argv_space;
    argv_ori_tmp = argv;
    goto open_binary;
  }  

  /* check for original ExOS (OpenBSD) format - remove eventually */
  if (lmagic == 0700303000)
    {
      struct exec hdr;
      u_int byte_count, page_count;
      Pte *ptes;
      int i;

#if 0
      fprintf(stderr,"File uses old executable format.\n");
#endif
      /* allocate physical pages and read data into it. */

      /* read a.out headers */
      if (lseek(fd,0,SEEK_SET) == -1 ||
	  read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
	fprintf(stderr,"Invalid executable format.\n");
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }
      entry_point = hdr.a_entry;

      /* alloc this much memory */
      byte_count = hdr.a_text + hdr.a_data + hdr.a_bss;
      page_count = PGNO(PGROUNDUP(byte_count));

      /* allocate it in both child and parent areas */
      if ((ptes = malloc(sizeof(Pte) * page_count)) == 0) {
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }
      for (i=0; i < page_count; i++)
	ptes[i] = PG_U|PG_W|PG_P;
      STOPP(misc,step3);
      proprintf("allocate childs vm in both parent and child, read text+data "
		"(sz %ld)\n", hdr.a_text + hdr.a_data);
      ISTART(misc,read0);
      if (sys_self_insert_pte_range(k, ptes, page_count, TEMP_REGION) < 0 ||
	  sys_insert_pte_range(k, &vpt[PGNO(TEMP_REGION)], page_count, 
			       UTEXT, k, envid) < 0 ||
	  read(fd, (void*)TEMP_REGION, hdr.a_text + hdr.a_data) != 
	  hdr.a_text + hdr.a_data) {
	fprintf (stderr,"Binary file invalid or corrupt.\n");
	for (i=0; i < page_count; i++)
	  ptes[i] = 0;
	sys_self_insert_pte_range(k, ptes, page_count, TEMP_REGION);
	sys_insert_pte_range(k, ptes, page_count, UTEXT, k, envid);
	free(ptes);
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }

      STOPP(misc,read0);
      /* zero the bss */
      proprintf("bzero bss  (sz %ld)\n",hdr.a_bss);
      ISTART(misc,read1);
      bzero((void*)(TEMP_REGION + hdr.a_text + hdr.a_data), hdr.a_bss);

      STOP(misc,read1);
      PRNAME(misc,read1);
      proprintf("unmap TEMP_REGION (sz %d pages)\n", page_count);
      ISTART(misc,step4);
      /* unmap the used TEMP_REGION */
      for (i=0; i < page_count; i++)
	ptes[i] = 0;
      sys_self_insert_pte_range(k, ptes, page_count, TEMP_REGION);
      free(ptes);
    }
  else if ((lmagic & 0x0000ffff) == 0514) /* check for ExOS COFF format */
    {
      FILHDR hdr;
      AOUTHDR opthdr;
      SCNHDR shdr;
      int s;

      /* allocate physical pages and read data into it. */
      /* read file headers */
      if (lseek(fd,0,SEEK_SET) == -1 ||
	  read(fd, &hdr, FILHSZ) != FILHSZ ||
	  hdr.f_opthdr != sizeof(opthdr) ||
	  !(hdr.f_flags & F_EXEC) ||
	  read(fd, &opthdr, hdr.f_opthdr) != hdr.f_opthdr) {
	fprintf(stderr,"Invalid executable format.\n");
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }
      entry_point = opthdr.entry;

      /* ensure ZMAGIC */
      if (opthdr.magic != ZMAGIC) {
	fprintf(stderr,"Exec cannot read non-ZMAGIC COFF executables.\n");
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }

      STOPP(misc,step3);
      proprintf("read in and map/zero COFF sections then unmap\n");
      ISTART(misc,read0);
      for (s=0; s < hdr.f_nscns; s++)
	if (read(fd, &shdr, SCNHSZ) != SCNHSZ ||
	    map_section(k, fd, &shdr, envid) == -1) {
	  fprintf(stderr,"Invalid executable format.\n");
	  errno = ENOEXEC;
	  goto fork_execve0_end_error_closefd;
	}
      STOPP(misc,read0);
      proprintf("nothing\n");
      ISTART(misc,read1);
      STOP(misc,read1);
      PRNAME(misc,read1);
      proprintf("nothing\n");
      ISTART(misc,step4);
    } else {
      fprintf(stderr, "Unknown file format.\n");
      errno = ENOEXEC;
      goto fork_execve0_end_error_closefd;
    }

  STOPP(misc,step4);

  close (fd);

  proprintf("Allocate stack for child\n");
  ISTART(misc,step5);

  /* allocate stack space for child */
  if (sys_insert_pte (k, PG_U|PG_W|PG_P, USTACKTOP-NBPG, k, envid) < 0) {
    fprintf(stderr,"sys_insert_pte failed\n");
    errno = ENOEXEC;
    goto fork_execve0_end_error;
  }

  STOPP(misc,step5);
  proprintf("ExecuteOnExecHandlers\n");
  ISTART(misc,step6);
  if (ExecuteOnExecHandlers(k,envid,execonly) == -1) {
    fprintf(stderr,"cleanup code not done yet\n");
    assert(-1);
  }
  STOPP(misc,step6);
  proprintf("Process table stuff\n");
  ISTART(misc,step7);
#ifdef PROCESS_TABLE
  if (execonly) {
    /* because true exec */
    NewPid = getpid();
    /* XXX -- this locking is ... up. I should really come up with a better
       convention as to what expects to be called with things locked and
       what doesn't */
    dlockputs(__PROCD_LD,"fork_execve0 get lock ");
    EXOS_LOCK(PROCINFO_LOCK);
    dlockputs(__PROCD_LD,"... got lock\n");
    EnterCritical (); 

    ProcChangeEnv(NewPid,envid);

    EXOS_UNLOCK(PROCINFO_LOCK);
    dlockputs(__PROCD_LD,"fork_execve0 release lock\n");
    ExitCritical (); 
  } else {
    /* because we are forking */
    NewPid = AllocateFreePid (envid);
  }
#endif  

#ifdef PROCESS_TABLE
  cu = u;
  if (!execonly) {
    AddProcEntry (&cu, (char *)FE_PATH, (char **)argv, NewPid, UAREA.pid);
    if ((cu.parent_slot = GetChildSlot (NewPid)) < 0) {
      errno = ENOEXEC;
      goto fork_execve0_end_error;
    }
  } else {
    /* TO TOM: what do we do this for?  */
    strncpy (UAREA.name, argv[0], U_NAMEMAX-1);
    UAREA.name[U_NAMEMAX-1] = '\0';
  }
  /* XXX -- on an exec we'll forget to unref our children's pids */
  /* TO TOM: shouldnt this clearchildinfo be at the top */
  ClearChildInfo (&cu);
  strncpy (cu.name, FE_PATH, U_NAMEMAX-1);
  cu.name[U_NAMEMAX-1] = '\0';
  cu.u_chld_state_chng = 0;
#endif /* PROCESS_TABLE */
  cu.u_in_critical = 0;
  cu.u_status = U_RUN;
  cu.u_entprologue = entry_point;
  cu.u_next_timeout = 0;
  cu.u_in_pfault = 0;
  cu.u_revoked_pages = 0;
  cu.u_donate = -1;

  STOPP(misc,step7);
  proprintf("Argv and Environment copying\n");
  ISTART(misc,step8);


//  printf("ENTERING ARGV COPY\n");
  /* ----------------------------------------------------------------------------- */
  /* allocate argv space for child */
  {
    char *buf = (char *)ARGV_START_LOCAL;
    char **bufv = (char **)ARGV_START_LOCAL;
    int len,i;

    argc = 0;
    while (argv[argc] != 0) {
      //printf("argv[%d] = %p\n",argc,argv[argc]);
      argc++;}
    
//    printf("argv: %p, argc: %d\n",argv,argc);

    /* allocate pages */
    {
      Pte *ptes;

      if ((ptes = malloc(sizeof(Pte) * NRARGVPG)) == 0) {
	return -1;
      }
      for (i=0; i < NRARGVPG; i++)
	ptes[i] = PG_U|PG_W|PG_P;
      
      if (sys_self_insert_pte_range(k, ptes, NRARGVPG, ARGV_START_LOCAL) < 0 ||
	  sys_insert_pte_range(k, &vpt[PGNO(ARGV_START_LOCAL)], NRARGVPG, 
			       ARGV_START, k, envid) < 0) {
	fprintf(stderr,"sys_insert_pte failed\n");
	for (i=0; i < NRARGVPG; i++)
	  ptes[i] = 0;
	sys_self_insert_pte_range(k, ptes, NRARGVPG, ARGV_START_LOCAL);
	sys_insert_pte_range(k, ptes, NRARGVPG, ARGV_START, k, envid);
	free(ptes);
	errno = ENOEXEC;
	goto fork_execve0_end_error_closefd;
      }
      free(ptes);
    }

    /* copy the args */
    buf += (argc + 1) * sizeof(char *);
    for (i = 0; i < argc; i++) {
//      fprintf(stderr,"argv[%d] ",i);
      len = strlen(argv[i]) + 1;
//      fprintf(stderr,"length %d\n",len);
      
      if ((int)(buf + len) > ARGV_START_LOCAL + NRARGVPG*NBPG) {
	kprintf("Argv too large truncating\n");
	break;
      }

      bufv[i] = buf - (ARGV_START_LOCAL - ARGV_START);
//      fprintf(stderr,"copied argument %d: %p %s (len %d) to %p bufv: %p\n",i,argv[i],argv[i],len,buf,bufv);
      memcpy(buf, argv[i],len);
      buf += len;
    }
    bufv[argc] = (char *)0;

    
  }
//  printf("DONE ARGV COPY\n");

  /* ----------------------------------------------------------------------------- */

#if 0
  /* COPY ARGUMENTS */
  argc = 0;
  iptr = (int *)ARGV_START_LOCAL;
  argv_p = (char *)(ARGV_START_LOCAL + NBPG);
  while(*argv != (char *)0) {
    strcpy(argv_p,*argv);
    iptr[argc] = strlen(*argv) + 1;
    argv_p += strlen(*argv) + 1;
    /*     fprintf(stderr,"%d len %d \"%s\"  ",argc,cu.u_argv_lengths[argc],*argv); */
    argc++;
    argv++;
  }
  iptr[argc] = -1;
  /*   fprintf(stderr,"ARGC %d\n",argc); */
#endif
#if 0
  argc = 0;
  argv_p = (char *)&cu.u_argv_space;
  while(*argv != (char *)0) {
    strcpy(argv_p,*argv);
    cu.u_argv_lengths[argc] = strlen(*argv) + 1;
    argv_p += strlen(*argv) + 1;
    /*     fprintf(stderr,"%d len %d \"%s\"  ",argc,cu.u_argv_lengths[argc],*argv); */
    argc++;
    argv++;
    if (argc == (UNIX_NR_ARGV - 1)) {
      fprintf(stderr,"argc (%d) is greater than maximum allowed (%d), truncating.\n",
	     argc,UNIX_NR_ARGV - 1);
      break;
    }
    if ((int)argv_p > (int)&cu.u_argv_space[0] + UNIX_ARGV_SIZE) {
      fprintf(stderr,"too much data in argv (%d) max is %d\n",
	     (int)argv_p - (int)&cu.u_argv_space[0],UNIX_ARGV_SIZE);
      break;
    }
  }
  cu.u_argv_lengths[argc] = -1;
#endif

  /* COPY ENVIRONMENT */
  argc = 0;
  (char * const *)argv_tmp = envptr;
  argv_p = (char *)&cu.u_env_space;
  if (argv_tmp)
  while(*argv_tmp != (char *)0) {
    strcpy(argv_p,*argv_tmp);
    cu.u_env_lengths[argc] = strlen(*argv_tmp) + 1;
    argv_p += strlen(*argv_tmp) + 1;
    /* fprintf(stderr,"%d len %d \"%s\"  ",argc,cu.u_env_lengths[argc],*argv_tmp); */
    argc++;
    argv_tmp++;
    if (argc == (UNIX_NR_ENV - 1)) {
      fprintf(stderr,"envc (%d) is greater than maximum allowed (%d), truncating.\n",
	     argc,UNIX_NR_ENV - 1);
      break;
    }
    if ((int)argv_p > (int)&cu.u_env_space[0] + UNIX_ENV_SIZE) {
      fprintf(stderr,"too much data in envp (%d) max is %d (%s)\n",
	     (int)argv_p - (int)&cu.u_env_space[0],UNIX_ENV_SIZE, __FILE__);
      break;
    }
  }
  cu.u_env_lengths[argc] = -1;
  /* fprintf(stderr,"ENVC %d\n",argc); */

  if (sys_wru (0, envid, &cu) < 0) {
    fprintf (stderr,"sys_wru failed\n");
    errno = ENOEXEC;
    goto fork_execve0_end_error;
  }
  STOPP(misc,step8);

  STOP(misc,execve);
  PRNAME(misc,execve);
  {
    extern void pr_fd_stat(void);
    pr_fd_stat();
  }

  /*fprintf(stderr,"allocating quantum\n");*/
  if (sys_quantum_alloc (k, -1, 0, envid) < 0) {
    fprintf (stderr,"could not alloc quantum\n");
    errno = ENOEXEC;
    goto fork_execve0_end_error;
  }

  if (execonly) {
    ProcessFreeQuanta(__envid);
    sys_env_free (0, __envid);
  }

  return (NewPid);

fork_execve0_end_error_closefd:
  close(fd);
fork_execve0_end_error:
  ProcessFreeQuanta(__envid);
  sys_env_free (k, envid);
err:
  if (extra_argv_space != NULL) free(extra_argv_space);
  return -1;
}