// XXX horrible back door to map a page quickly regardless of translation map object, etc. // used only during VM setup. int vm_translation_map_quick_map(kernel_args *ka, addr_t va, addr_t pa, unsigned int attributes, addr_t (*get_free_page)(kernel_args *)) { addr_t pgtable_phys; unsigned long *pgtable; int index; TMAP_TRACE("quick_map: va 0x%lx pa 0x%lx, attributes 0x%x\n", va, pa, attributes); // look up and dereference the first entry pgtable_phys = ka->arch_args.phys_pgdir; get_physical_page_tmap(pgtable_phys, (addr_t *)&pgtable, PHYSICAL_PAGE_NO_WAIT); // dprintf("phys 0x%lx, virt %p\n", pgtable_phys, pgtable); index = PGTABLE0_ENTRY(va); ASSERT(PGENT_PRESENT(pgtable[index])); // level 2 pgtable_phys = PGENT_TO_ADDR(pgtable[index]); get_physical_page_tmap(pgtable_phys, (addr_t *)&pgtable, PHYSICAL_PAGE_NO_WAIT); index = PGTABLE1_ENTRY(va); if (!PGENT_PRESENT(pgtable[index])) { pgtable_phys = get_free_page(ka); pgtable[index] = pgtable_phys | 3; TMAP_TRACE("had to allocate level 2: paddr 0x%lx\n", pgtable_phys); } else { pgtable_phys = PGENT_TO_ADDR(pgtable[index]); // dprintf("level 2: paddr 0x%lx\n", pgtable_phys); } // level 3 get_physical_page_tmap(pgtable_phys, (addr_t *)&pgtable, PHYSICAL_PAGE_NO_WAIT); index = PGTABLE2_ENTRY(va); if (!PGENT_PRESENT(pgtable[index])) { pgtable_phys = get_free_page(ka); pgtable[index] = pgtable_phys | 3; TMAP_TRACE("had to allocate level 3: paddr 0x%lx\n", pgtable_phys); } else { pgtable_phys = PGENT_TO_ADDR(pgtable[index]); // dprintf("level 3: paddr 0x%lx\n", pgtable_phys); } // map the page get_physical_page_tmap(pgtable_phys, (addr_t *)&pgtable, PHYSICAL_PAGE_NO_WAIT); index = PGTABLE3_ENTRY(va); pa = ROUNDOWN(pa, PAGE_SIZE); pgtable[index] = pa | ((attributes & LOCK_RW) ? (1<<1) : 0) | ((attributes & LOCK_KERNEL) ? 0 : (1<<2)) | 1; return 0; }
/* simulate page fault for given virtual address range */ status_t vm_simulate_pf(addr_t start, addr_t end) { status_t err = NO_ERROR; /* align address by page size */ start = ROUNDOWN(start, PAGE_SIZE); end = ROUNDUP(end, PAGE_SIZE); while(start < end) { err = vm_soft_page_fault(start, false, false, true); if(err != NO_ERROR) break; start += PAGE_SIZE; } return err; }
image_id elf_load_kspace(const char *path, const char *sym_prepend) { struct Elf32_Ehdr *eheader; struct Elf32_Phdr *pheaders; struct elf_image_info *image; void *vnode = NULL; int fd; int err; int i; ssize_t len; addr_t lowest_address = 0; addr_t highest_address = 0; dprintf("elf_load_kspace: entry path '%s'\n", path); fd = sys_open(path, 0); if(fd < 0) return fd; err = vfs_get_vnode_from_fd(fd, true, &vnode); if(err < 0) goto error0; // XXX awful hack to keep someone else from trying to load this image // probably not a bad thing, shouldn't be too many races mutex_lock(&image_load_lock); // make sure it's not loaded already. Search by vnode image = find_image_by_vnode(vnode); if( image ) { atomic_add( &image->ref_count, 1 ); //err = ERR_NOT_ALLOWED; goto done; } eheader = (struct Elf32_Ehdr *)kmalloc( sizeof( *eheader )); if( !eheader ) { err = ERR_NO_MEMORY; goto error; } len = sys_read(fd, eheader, 0, sizeof(*eheader)); if(len < 0) { err = len; goto error1; } if(len != sizeof(*eheader)) { // short read err = ERR_INVALID_BINARY; goto error1; } err = verify_eheader(eheader); if(err < 0) goto error1; image = create_image_struct(); if(!image) { err = ERR_NO_MEMORY; goto error1; } image->vnode = vnode; image->eheader = eheader; pheaders = kmalloc(eheader->e_phnum * eheader->e_phentsize); if(pheaders == NULL) { dprintf("error allocating space for program headers\n"); err = ERR_NO_MEMORY; goto error2; } // dprintf("reading in program headers at 0x%x, len 0x%x\n", eheader.e_phoff, eheader.e_phnum * eheader.e_phentsize); len = sys_read(fd, pheaders, eheader->e_phoff, eheader->e_phnum * eheader->e_phentsize); if(len < 0) { err = len; dprintf("error reading in program headers\n"); goto error3; } if(len != eheader->e_phnum * eheader->e_phentsize) { dprintf("short read while reading in program headers\n"); err = -1; goto error3; } for(i=0; i < eheader->e_phnum; i++) { char region_name[64]; bool ro_segment_handled = false; bool rw_segment_handled = false; int image_region; int lock; // dprintf("looking at program header %d\n", i); switch(pheaders[i].p_type) { case PT_LOAD: break; case PT_DYNAMIC: image->dynamic_ptr = pheaders[i].p_vaddr; continue; default: dprintf("unhandled pheader type 0x%x\n", pheaders[i].p_type); continue; } // we're here, so it must be a PT_LOAD segment if((pheaders[i].p_flags & (PF_R | PF_W | PF_X)) == (PF_R | PF_W)) { // this is the writable segment if(rw_segment_handled) { // we've already created this segment continue; } rw_segment_handled = true; image_region = 1; lock = LOCK_RW|LOCK_KERNEL; sprintf(region_name, "%s_seg1", path); } else if((pheaders[i].p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) { // this is the non-writable segment if(ro_segment_handled) { // we've already created this segment continue; } ro_segment_handled = true; image_region = 0; // lock = LOCK_RO|LOCK_KERNEL; lock = LOCK_RW|LOCK_KERNEL; sprintf(region_name, "%s_seg0", path); } else { dprintf("weird program header flags 0x%x\n", pheaders[i].p_flags); continue; } image->regions[image_region].size = ROUNDUP(pheaders[i].p_memsz + (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE); if(i == 0) { // put region zero anywhere image->regions[image_region].id = vm_create_anonymous_region(vm_get_kernel_aspace_id(), region_name, (void **)&image->regions[image_region].start, REGION_ADDR_ANY_ADDRESS, image->regions[image_region].size, REGION_WIRING_WIRED, lock); } else { // try to line the other regions up so that their relative distances are according to the ELF header image->regions[image_region].start = ROUNDOWN(pheaders[i].p_vaddr + image->regions[0].delta, PAGE_SIZE); // dprintf("elf: region 0.delta 0x%x region %d.pvaddr 0x%x region %d.start 0x%x\n", // image->regions[0].delta, i, pheaders[i].p_vaddr, i, image->regions[image_region].start); image->regions[image_region].id = vm_create_anonymous_region(vm_get_kernel_aspace_id(), region_name, (void **)&image->regions[image_region].start, REGION_ADDR_EXACT_ADDRESS, image->regions[image_region].size, REGION_WIRING_WIRED, lock); } if(image->regions[image_region].id < 0) { dprintf("error allocating region!\n"); err = ERR_INVALID_BINARY; goto error4; } image->regions[image_region].delta = image->regions[image_region].start - ROUNDOWN(pheaders[i].p_vaddr, PAGE_SIZE); // dprintf("elf_load_kspace: created a region at 0x%x\n", image->regions[image_region].start); len = sys_read(fd, (void *)(image->regions[image_region].start + (pheaders[i].p_vaddr % PAGE_SIZE)), pheaders[i].p_offset, pheaders[i].p_filesz); if(len < 0) { err = len; dprintf("error reading in seg %d\n", i); goto error4; } if(lowest_address == 0 || image->regions[image_region].start < lowest_address) lowest_address = image->regions[image_region].start; if(highest_address == 0 || (image->regions[image_region].start + image->regions[image_region].size) > highest_address) highest_address = image->regions[image_region].start + image->regions[image_region].size; } if(image->regions[1].start != 0) { if(image->regions[0].delta != image->regions[1].delta) { dprintf("could not load binary, fix the region problem!\n"); dump_image_info(image); err = ERR_NO_MEMORY; goto error4; } } // modify the dynamic ptr by the delta of the regions image->dynamic_ptr += image->regions[0].delta; err = elf_parse_dynamic_section(image); if(err < 0) goto error4; err = elf_relocate(image, sym_prepend); if(err < 0) goto error4; err = 0; kfree(pheaders); sys_close(fd); insert_image_in_list(image); done: mutex_unlock(&image_load_lock); dprintf("elf_load_kspace: syncing icache from 0x%lx to 0x%lx\n", lowest_address, highest_address); arch_cpu_sync_icache((void *)lowest_address, highest_address - lowest_address); dprintf("elf_load_kspace: done!\n"); return image->id; error4: if(image->regions[1].id >= 0) vm_delete_region(vm_get_kernel_aspace_id(), image->regions[1].id); if(image->regions[0].id >= 0) vm_delete_region(vm_get_kernel_aspace_id(), image->regions[0].id); error3: kfree(image); error2: kfree(pheaders); error1: kfree(eheader); error: mutex_unlock(&image_load_lock); error0: if(vnode) vfs_put_vnode_ptr(vnode); sys_close(fd); return err; }
int elf_load_uspace(const char *path, struct proc *p, int flags, addr_t *entry) { struct Elf32_Ehdr eheader; struct Elf32_Phdr *pheaders = NULL; int fd; int err; int i; ssize_t len; dprintf("elf_load_uspace: entry path '%s', proc %p\n", path, p); fd = sys_open(path, 0); if(fd < 0) return fd; len = sys_read(fd, &eheader, 0, sizeof(eheader)); if(len < 0) { err = len; goto error; } if(len != sizeof(eheader)) { // short read err = ERR_INVALID_BINARY; goto error; } err = verify_eheader(&eheader); if(err < 0) goto error; pheaders = kmalloc(eheader.e_phnum * eheader.e_phentsize); if(pheaders == NULL) { dprintf("error allocating space for program headers\n"); err = ERR_NO_MEMORY; goto error; } dprintf("reading in program headers at 0x%x, len 0x%x\n", eheader.e_phoff, eheader.e_phnum * eheader.e_phentsize); len = sys_read(fd, pheaders, eheader.e_phoff, eheader.e_phnum * eheader.e_phentsize); if(len < 0) { err = len; dprintf("error reading in program headers\n"); goto error; } if(len != eheader.e_phnum * eheader.e_phentsize) { dprintf("short read while reading in program headers\n"); err = -1; goto error; } for(i=0; i < eheader.e_phnum; i++) { char region_name[64]; region_id id; char *region_addr; sprintf(region_name, "%s_seg%d", path, i); region_addr = (char *)ROUNDOWN(pheaders[i].p_vaddr, PAGE_SIZE); if(pheaders[i].p_flags & PF_W) { /* * rw segment */ unsigned start_clearing; unsigned to_clear; unsigned A= pheaders[i].p_vaddr+pheaders[i].p_memsz; unsigned B= pheaders[i].p_vaddr+pheaders[i].p_filesz; A= ROUNDOWN(A, PAGE_SIZE); B= ROUNDOWN(B, PAGE_SIZE); id= vm_map_file( p->aspace_id, region_name, (void **)®ion_addr, REGION_ADDR_EXACT_ADDRESS, ROUNDUP(pheaders[i].p_filesz+ (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE), LOCK_RW, REGION_PRIVATE_MAP, path, ROUNDOWN(pheaders[i].p_offset, PAGE_SIZE) ); if(id < 0) { dprintf("error allocating region!\n"); err = ERR_INVALID_BINARY; goto error; } /* * clean garbage brought by mmap */ start_clearing= (unsigned)region_addr + (pheaders[i].p_vaddr % PAGE_SIZE) + pheaders[i].p_filesz; to_clear= ROUNDUP(pheaders[i].p_filesz+ (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE) - (pheaders[i].p_vaddr % PAGE_SIZE) - (pheaders[i].p_filesz); memset((void*)start_clearing, 0, to_clear); /* * check if we need extra storage for the bss */ if(A != B) { size_t bss_size; bss_size= ROUNDUP(pheaders[i].p_memsz+ (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE) - ROUNDUP(pheaders[i].p_filesz+ (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE); sprintf(region_name, "%s_bss%d", path, 'X'); region_addr+= ROUNDUP(pheaders[i].p_filesz+ (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE), id= vm_create_anonymous_region( p->aspace_id, region_name, (void **)®ion_addr, REGION_ADDR_EXACT_ADDRESS, bss_size, REGION_WIRING_LAZY, LOCK_RW ); if(id < 0) { dprintf("error allocating region!\n"); err = ERR_INVALID_BINARY; goto error; } } } else { /* * assume rx segment */ id= vm_map_file( p->aspace_id, region_name, (void **)®ion_addr, REGION_ADDR_EXACT_ADDRESS, ROUNDUP(pheaders[i].p_memsz + (pheaders[i].p_vaddr % PAGE_SIZE), PAGE_SIZE), LOCK_RO, REGION_PRIVATE_MAP, path, ROUNDOWN(pheaders[i].p_offset, PAGE_SIZE) ); if(id < 0) { dprintf("error mapping text!\n"); err = ERR_INVALID_BINARY; goto error; } } } dprintf("elf_load: done!\n"); *entry = eheader.e_entry; err = 0; error: if(pheaders) kfree(pheaders); sys_close(fd); return err; }
static int map_tmap(vm_translation_map *map, addr_t va, addr_t pa, unsigned int attributes) { addr_t pgtable_phys; unsigned long *pgtable; int index; vm_page *page; TMAP_TRACE("map_tmap: va 0x%lx pa 0x%lx, attributes 0x%x\n", va, pa, attributes); // look up and dereference the first entry pgtable = map->arch_data->pgdir_virt; ASSERT(pgtable); TMAP_TRACE("map_tmap top level pgdir virt %p\n", pgtable); index = PGTABLE0_ENTRY(va); if (!PGENT_PRESENT(pgtable[index])) { page = vm_page_allocate_page(PAGE_STATE_CLEAR); pgtable_phys = page->ppn * PAGE_SIZE; list_add_head(&map->arch_data->pagetable_list, &page->queue_node); pgtable[index] = pgtable_phys | (PT_PRESENT|PT_WRITE|PT_USER); map->map_count++; TMAP_TRACE("map_tmap: had to allocate level 1: paddr 0x%lx, ent @ %p = 0x%lx\n", pgtable_phys, &pgtable[index], pgtable[index]); } else { pgtable_phys = PGENT_TO_ADDR(pgtable[index]); TMAP_TRACE("map_tmap level 1: paddr 0x%lx\n", pgtable_phys); } // level 2 pgtable = phys_to_virt(pgtable_phys); index = PGTABLE1_ENTRY(va); if (!PGENT_PRESENT(pgtable[index])) { page = vm_page_allocate_page(PAGE_STATE_CLEAR); pgtable_phys = page->ppn * PAGE_SIZE; list_add_head(&map->arch_data->pagetable_list, &page->queue_node); pgtable[index] = pgtable_phys | (PT_PRESENT|PT_WRITE|PT_USER); map->map_count++; TMAP_TRACE("map_tmap: had to allocate level 2: paddr 0x%lx, ent @ %p = 0x%lx\n", pgtable_phys, &pgtable[index], pgtable[index]); } else { pgtable_phys = PGENT_TO_ADDR(pgtable[index]); TMAP_TRACE("map_tmap level 2: paddr 0x%lx\n", pgtable_phys); } // level 3 pgtable = phys_to_virt(pgtable_phys); index = PGTABLE2_ENTRY(va); if (!PGENT_PRESENT(pgtable[index])) { page = vm_page_allocate_page(PAGE_STATE_CLEAR); pgtable_phys = page->ppn * PAGE_SIZE; list_add_head(&map->arch_data->pagetable_list, &page->queue_node); pgtable[index] = pgtable_phys | (PT_PRESENT|PT_WRITE|PT_USER); map->map_count++; TMAP_TRACE("map_tmap: had to allocate level 3: paddr 0x%lx, ent @ %p = 0x%lx\n", pgtable_phys, &pgtable[index], pgtable[index]); } else { pgtable_phys = PGENT_TO_ADDR(pgtable[index]); TMAP_TRACE("map_tmap level 3: paddr 0x%lx\n", pgtable_phys); } // map the page pgtable = phys_to_virt(pgtable_phys); index = PGTABLE3_ENTRY(va); pa = ROUNDOWN(pa, PAGE_SIZE); pgtable[index] = pa | ((attributes & LOCK_RW) ? PT_WRITE : 0) | ((attributes & LOCK_KERNEL) ? 0 : PT_USER) | PT_PRESENT; map->map_count++; TMAP_TRACE("map_tmap: ent @ %p = 0x%lx\n", &pgtable[index], pgtable[index]); return 0; }
static int unmap_tmap(vm_translation_map *map, addr_t start, addr_t end) { TMAP_TRACE("unmap_tmap: start 0x%lx, end 0x%lx\n", start, end); start = ROUNDOWN(start, PAGE_SIZE); end = ROUNDUP(end, PAGE_SIZE); if(start >= end) return 0; unsigned int index0; unsigned int index1; unsigned int index2; unsigned int index3; addr_t addr = start; unsigned long *pgtable0; unsigned long *pgtable1; unsigned long *pgtable2; unsigned long *pgtable3; pgtable0 = map->arch_data->pgdir_virt; for (index0 = PGTABLE0_ENTRY(addr); index0 <= PGTABLE0_ENTRY(end); index0++) { if (!PGENT_PRESENT(pgtable0[index0])) { addr = ROUNDUP(addr, 1UL << 39); continue; } pgtable1 = phys_to_virt(PGENT_TO_ADDR(pgtable0[index0])); for (index1 = PGTABLE1_ENTRY(addr); index1 <= PGTABLE1_ENTRY(end); index1++) { if (!PGENT_PRESENT(pgtable1[index1])) { addr = ROUNDUP(addr, 1UL << 30); continue; } pgtable2 = phys_to_virt(PGENT_TO_ADDR(pgtable1[index1])); for (index2 = PGTABLE2_ENTRY(addr); index2 <= PGTABLE2_ENTRY(end); index2++) { if (!PGENT_PRESENT(pgtable2[index2])) { addr = ROUNDUP(addr, 1UL << 21); continue; } pgtable3 = phys_to_virt(PGENT_TO_ADDR(pgtable2[index2])); for (index3 = PGTABLE3_ENTRY(addr); index3 <= PGTABLE3_ENTRY(end); index3++) { if (!PGENT_PRESENT(pgtable3[index3])) { addr += PAGE_SIZE; continue; } TMAP_TRACE("unmap_tmap: unmapping at va 0x%lx\n", addr); // clear it out pgtable3[index3] &= ~PAGE_PRESENT; map->map_count--; addr += PAGE_SIZE; } } } } return 0; }