void rmcache(struct cached_page *cp) { struct phys_block *pb = cp->page; int hv_dev = makehash(cp->dev, cp->dev_offset); assert(cp->page->flags & PBF_INCACHE); cp->page->flags &= ~PBF_INCACHE; rmhash_bydev(cp, &cache_hash_bydev[hv_dev]); if(cp->ino != VMC_NO_INODE) { int hv_ino = makehash(cp->ino, cp->ino_offset); rmhash_byino(cp, &cache_hash_byino[hv_ino]); } assert(cp->page->refcount >= 1); cp->page->refcount--; lru_rm(cp); if(pb->refcount == 0) { assert(pb->phys != MAP_NONE); free_mem(ABS2CLICK(pb->phys), 1); SLABFREE(pb); } SLABFREE(cp); }
static int mappedfile_unreference(struct phys_region *pr) { assert(pr->ph->refcount == 0); if(pr->ph->phys != MAP_NONE) free_mem(ABS2CLICK(pr->ph->phys), 1); return OK; }
/*===========================================================================* * vm_freepages * *===========================================================================*/ PRIVATE void vm_freepages(vir_bytes vir, vir_bytes phys, int pages, int reason) { vm_assert(reason >= 0 && reason < VMP_CATEGORIES); if(vir >= vmp->vm_stacktop) { vm_assert(!(vir % I386_PAGE_SIZE)); vm_assert(!(phys % I386_PAGE_SIZE)); FREE_MEM(ABS2CLICK(phys), pages); if(pt_writemap(&vmp->vm_pt, arch_vir2map(vmp, vir), MAP_NONE, pages*I386_PAGE_SIZE, 0, WMF_OVERWRITE) != OK) vm_panic("vm_freepages: pt_writemap failed", NO_NUM); } else { printf("VM: vm_freepages not freeing VM heap pages (%d)\n", pages); } }
/*===========================================================================* * vm_freepages * *===========================================================================*/ PRIVATE void vm_freepages(vir_bytes vir, vir_bytes phys, int pages, int reason) { assert(reason >= 0 && reason < VMP_CATEGORIES); if(vir >= vmprocess->vm_stacktop) { assert(!(vir % I386_PAGE_SIZE)); assert(!(phys % I386_PAGE_SIZE)); free_mem(ABS2CLICK(phys), pages); if(pt_writemap(vmprocess, &vmprocess->vm_pt, arch_vir2map(vmprocess, vir), MAP_NONE, pages*I386_PAGE_SIZE, 0, WMF_OVERWRITE) != OK) panic("vm_freepages: pt_writemap failed"); } else { printf("VM: vm_freepages not freeing VM heap pages (%d)\n", pages); } #if SANITYCHECKS /* If SANITYCHECKS are on, flush tlb so accessing freed pages is * always trapped, also if not in tlb. */ if((sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { panic("VMCTL_FLUSHTLB failed"); } #endif }
/*===========================================================================* * pt_writemap * *===========================================================================*/ int pt_writemap(struct vmproc * vmp, pt_t *pt, vir_bytes v, phys_bytes physaddr, size_t bytes, u32_t flags, u32_t writemapflags) { /* Write mapping into page table. Allocate a new page table if necessary. */ /* Page directory and table entries for this virtual address. */ int p, pages; int verify = 0; int ret = OK; #ifdef CONFIG_SMP int vminhibit_clear = 0; /* FIXME * don't do it everytime, stop the process only on the first change and * resume the execution on the last change. Do in a wrapper of this * function */ if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && !(vmp->vm_flags & VMF_EXITING)) { sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0); vminhibit_clear = 1; } #endif if(writemapflags & WMF_VERIFY) verify = 1; assert(!(bytes % VM_PAGE_SIZE)); assert(!(flags & ~(PTF_ALLFLAGS))); pages = bytes / VM_PAGE_SIZE; /* MAP_NONE means to clear the mapping. It doesn't matter * what's actually written into the PTE if PRESENT * isn't on, so we can just write MAP_NONE into it. */ assert(physaddr == MAP_NONE || (flags & ARCH_VM_PTE_PRESENT)); assert(physaddr != MAP_NONE || !flags); /* First make sure all the necessary page tables are allocated, * before we start writing in any of them, because it's a pain * to undo our work properly. */ ret = pt_ptalloc_in_range(pt, v, v + VM_PAGE_SIZE*pages, flags, verify); if(ret != OK) { printf("VM: writemap: pt_ptalloc_in_range failed\n"); goto resume_exit; } /* Now write in them. */ for(p = 0; p < pages; p++) { u32_t entry; int pde = ARCH_VM_PDE(v); int pte = ARCH_VM_PTE(v); assert(!(v % VM_PAGE_SIZE)); assert(pte >= 0 && pte < ARCH_VM_PT_ENTRIES); assert(pde >= 0 && pde < ARCH_VM_DIR_ENTRIES); /* Page table has to be there. */ assert(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT); /* We do not expect it to be a bigpage. */ assert(!(pt->pt_dir[pde] & ARCH_VM_BIGPAGE)); /* Make sure page directory entry for this page table * is marked present and page table entry is available. */ assert(pt->pt_pt[pde]); #if SANITYCHECKS /* We don't expect to overwrite a page. */ if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY))) assert(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)); #endif if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) { #if defined(__i386__) physaddr = pt->pt_pt[pde][pte] & ARCH_VM_ADDR_MASK; #elif defined(__arm__) physaddr = pt->pt_pt[pde][pte] & ARM_VM_PTE_MASK; #endif } if(writemapflags & WMF_FREE) { free_mem(ABS2CLICK(physaddr), 1); } /* Entry we will write. */ #if defined(__i386__) entry = (physaddr & ARCH_VM_ADDR_MASK) | flags; #elif defined(__arm__) entry = (physaddr & ARM_VM_PTE_MASK) | flags; #endif if(verify) { u32_t maskedentry; maskedentry = pt->pt_pt[pde][pte]; #if defined(__i386__) maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY); #endif /* Verify pagetable entry. */ #if defined(__i386__) if(entry & ARCH_VM_PTE_RW) { /* If we expect a writable page, allow a readonly page. */ maskedentry |= ARCH_VM_PTE_RW; } #elif defined(__arm__) if(!(entry & ARCH_VM_PTE_RO)) { /* If we expect a writable page, allow a readonly page. */ maskedentry &= ~ARCH_VM_PTE_RO; } maskedentry &= ~(ARM_VM_PTE_WB|ARM_VM_PTE_WT); #endif if(maskedentry != entry) { printf("pt_writemap: mismatch: "); #if defined(__i386__) if((entry & ARCH_VM_ADDR_MASK) != (maskedentry & ARCH_VM_ADDR_MASK)) { #elif defined(__arm__) if((entry & ARM_VM_PTE_MASK) != (maskedentry & ARM_VM_PTE_MASK)) { #endif printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ", (long)entry, (long)maskedentry); } else printf("phys ok; "); printf(" flags: found %s; ", ptestr(pt->pt_pt[pde][pte])); printf(" masked %s; ", ptestr(maskedentry)); printf(" expected %s\n", ptestr(entry)); printf("found 0x%x, wanted 0x%x\n", pt->pt_pt[pde][pte], entry); ret = EFAULT; goto resume_exit; } } else { /* Write pagetable entry. */ pt->pt_pt[pde][pte] = entry; } physaddr += VM_PAGE_SIZE; v += VM_PAGE_SIZE; } resume_exit: #ifdef CONFIG_SMP if (vminhibit_clear) { assert(vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && !(vmp->vm_flags & VMF_EXITING)); sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0); } #endif return ret; } /*===========================================================================* * pt_checkrange * *===========================================================================*/ int pt_checkrange(pt_t *pt, vir_bytes v, size_t bytes, int write) { int p, pages; assert(!(bytes % VM_PAGE_SIZE)); pages = bytes / VM_PAGE_SIZE; for(p = 0; p < pages; p++) { int pde = ARCH_VM_PDE(v); int pte = ARCH_VM_PTE(v); assert(!(v % VM_PAGE_SIZE)); assert(pte >= 0 && pte < ARCH_VM_PT_ENTRIES); assert(pde >= 0 && pde < ARCH_VM_DIR_ENTRIES); /* Page table has to be there. */ if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT)) return EFAULT; /* Make sure page directory entry for this page table * is marked present and page table entry is available. */ assert((pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT) && pt->pt_pt[pde]); if(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)) { return EFAULT; } #if defined(__i386__) if(write && !(pt->pt_pt[pde][pte] & ARCH_VM_PTE_RW)) { #elif defined(__arm__) if(write && (pt->pt_pt[pde][pte] & ARCH_VM_PTE_RO)) { #endif return EFAULT; } v += VM_PAGE_SIZE; } return OK; } /*===========================================================================* * pt_new * *===========================================================================*/ int pt_new(pt_t *pt) { /* Allocate a pagetable root. Allocate a page-aligned page directory * and set them to 0 (indicating no page tables are allocated). Lookup * its physical address as we'll need that in the future. Verify it's * page-aligned. */ int i, r; /* Don't ever re-allocate/re-move a certain process slot's * page directory once it's been created. This is a fraction * faster, but also avoids having to invalidate the page * mappings from in-kernel page tables pointing to * the page directories (the page_directories data). */ if(!pt->pt_dir && !(pt->pt_dir = vm_allocpages((phys_bytes *)&pt->pt_dir_phys, VMP_PAGEDIR, ARCH_PAGEDIR_SIZE/VM_PAGE_SIZE))) { return ENOMEM; } assert(!((u32_t)pt->pt_dir_phys % ARCH_PAGEDIR_SIZE)); for(i = 0; i < ARCH_VM_DIR_ENTRIES; i++) { pt->pt_dir[i] = 0; /* invalid entry (PRESENT bit = 0) */ pt->pt_pt[i] = NULL; } /* Where to start looking for free virtual address space? */ pt->pt_virtop = 0; /* Map in kernel. */ if((r=pt_mapkernel(pt)) != OK) return r; return OK; }
void init_vm(void) { int s, i; static struct memory mem_chunks[NR_MEMS]; static struct boot_image *ip; extern void __minix_init(void); multiboot_module_t *mod; vir_bytes kern_dyn, kern_static; #if SANITYCHECKS incheck = nocheck = 0; #endif /* Retrieve various crucial boot parameters */ if(OK != (s=sys_getkinfo(&kernel_boot_info))) { panic("couldn't get bootinfo: %d", s); } /* Turn file mmap on? */ env_parse("filemap", "d", 0, &enable_filemap, 0, 1); /* Sanity check */ assert(kernel_boot_info.mmap_size > 0); assert(kernel_boot_info.mods_with_kernel > 0); /* Get chunks of available memory. */ get_mem_chunks(mem_chunks); /* Set table to 0. This invalidates all slots (clear VMF_INUSE). */ memset(vmproc, 0, sizeof(vmproc)); for(i = 0; i < ELEMENTS(vmproc); i++) { vmproc[i].vm_slot = i; } /* Initialize ACL data structures. */ acl_init(); /* region management initialization. */ map_region_init(); /* Initialize tables to all physical memory. */ mem_init(mem_chunks); /* Architecture-dependent initialization. */ init_proc(VM_PROC_NR); pt_init(); /* The kernel's freelist does not include boot-time modules; let * the allocator know that the total memory is bigger. */ for (mod = &kernel_boot_info.module_list[0]; mod < &kernel_boot_info.module_list[kernel_boot_info.mods_with_kernel-1]; mod++) { phys_bytes len = mod->mod_end-mod->mod_start+1; len = roundup(len, VM_PAGE_SIZE); mem_add_total_pages(len/VM_PAGE_SIZE); } kern_dyn = kernel_boot_info.kernel_allocated_bytes_dynamic; kern_static = kernel_boot_info.kernel_allocated_bytes; kern_static = roundup(kern_static, VM_PAGE_SIZE); mem_add_total_pages((kern_dyn + kern_static)/VM_PAGE_SIZE); /* Give these processes their own page table. */ for (ip = &kernel_boot_info.boot_procs[0]; ip < &kernel_boot_info.boot_procs[NR_BOOT_PROCS]; ip++) { struct vmproc *vmp; if(ip->proc_nr < 0) continue; assert(ip->start_addr); /* VM has already been set up by the kernel and pt_init(). * Any other boot process is already in memory and is set up * here. */ if(ip->proc_nr == VM_PROC_NR) continue; vmp = init_proc(ip->proc_nr); exec_bootproc(vmp, ip); /* Free the file blob */ assert(!(ip->start_addr % VM_PAGE_SIZE)); ip->len = roundup(ip->len, VM_PAGE_SIZE); free_mem(ABS2CLICK(ip->start_addr), ABS2CLICK(ip->len)); } /* Set up table of calls. */ #define CALLMAP(code, func) { int i; \ i=CALLNUMBER(code); \ assert(i >= 0); \ assert(i < NR_VM_CALLS); \ vm_calls[i].vmc_func = (func); \ vm_calls[i].vmc_name = #code; \ } /* Set call table to 0. This invalidates all calls (clear * vmc_func). */ memset(vm_calls, 0, sizeof(vm_calls)); /* Basic VM calls. */ CALLMAP(VM_MMAP, do_mmap); CALLMAP(VM_MUNMAP, do_munmap); CALLMAP(VM_MAP_PHYS, do_map_phys); CALLMAP(VM_UNMAP_PHYS, do_munmap); /* Calls from PM. */ CALLMAP(VM_EXIT, do_exit); CALLMAP(VM_FORK, do_fork); CALLMAP(VM_BRK, do_brk); CALLMAP(VM_WILLEXIT, do_willexit); CALLMAP(VM_NOTIFY_SIG, do_notify_sig); /* Calls from VFS. */ CALLMAP(VM_VFS_REPLY, do_vfs_reply); CALLMAP(VM_VFS_MMAP, do_vfs_mmap); /* Calls from RS */ CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv); CALLMAP(VM_RS_UPDATE, do_rs_update); CALLMAP(VM_RS_MEMCTL, do_rs_memctl); /* Calls from RS/VFS */ CALLMAP(VM_PROCCTL, do_procctl); /* Generic calls. */ CALLMAP(VM_REMAP, do_remap); CALLMAP(VM_REMAP_RO, do_remap); CALLMAP(VM_GETPHYS, do_get_phys); CALLMAP(VM_SHM_UNMAP, do_munmap); CALLMAP(VM_GETREF, do_get_refcount); CALLMAP(VM_INFO, do_info); CALLMAP(VM_QUERY_EXIT, do_query_exit); CALLMAP(VM_WATCH_EXIT, do_watch_exit); /* Cache blocks. */ CALLMAP(VM_MAPCACHEPAGE, do_mapcache); CALLMAP(VM_SETCACHEPAGE, do_setcache); /* getrusage */ CALLMAP(VM_GETRUSAGE, do_getrusage); /* Initialize the structures for queryexit */ init_query_exit(); /* Acquire kernel ipc vectors that weren't available * before VM had determined kernel mappings */ __minix_init(); }
/*===========================================================================* * pt_init * *===========================================================================*/ PUBLIC void pt_init(phys_bytes usedlimit) { /* By default, the kernel gives us a data segment with pre-allocated * memory that then can't grow. We want to be able to allocate memory * dynamically, however. So here we copy the part of the page table * that's ours, so we get a private page table. Then we increase the * hardware segment size so we can allocate memory above our stack. */ pt_t *newpt; int s, r; vir_bytes v; phys_bytes lo, hi; vir_bytes extra_clicks; u32_t moveup = 0; int global_bit_ok = 0; int free_pde; int p; struct vm_ep_data ep_data; vir_bytes sparepages_mem; phys_bytes sparepages_ph; vir_bytes ptr; /* Shorthand. */ newpt = &vmprocess->vm_pt; /* Get ourselves spare pages. */ ptr = (vir_bytes) static_sparepages; ptr += I386_PAGE_SIZE - (ptr % I386_PAGE_SIZE); if(!(sparepages_mem = ptr)) panic("pt_init: aalloc for spare failed"); if((r=sys_umap(SELF, VM_D, (vir_bytes) sparepages_mem, I386_PAGE_SIZE*SPAREPAGES, &sparepages_ph)) != OK) panic("pt_init: sys_umap failed: %d", r); missing_spares = 0; assert(STATIC_SPAREPAGES < SPAREPAGES); for(s = 0; s < SPAREPAGES; s++) { if(s >= STATIC_SPAREPAGES) { sparepages[s].page = NULL; missing_spares++; continue; } sparepages[s].page = (void *) (sparepages_mem + s*I386_PAGE_SIZE); sparepages[s].phys = sparepages_ph + s*I386_PAGE_SIZE; } /* global bit and 4MB pages available? */ global_bit_ok = _cpufeature(_CPUF_I386_PGE); bigpage_ok = _cpufeature(_CPUF_I386_PSE); /* Set bit for PTE's and PDE's if available. */ if(global_bit_ok) global_bit = I386_VM_GLOBAL; /* The kernel and boot time processes need an identity mapping. * We use full PDE's for this without separate page tables. * Figure out which pde we can start using for other purposes. */ id_map_high_pde = usedlimit / I386_BIG_PAGE_SIZE; /* We have to make mappings up till here. */ free_pde = id_map_high_pde+1; /* Initial (current) range of our virtual address space. */ lo = CLICK2ABS(vmprocess->vm_arch.vm_seg[T].mem_phys); hi = CLICK2ABS(vmprocess->vm_arch.vm_seg[S].mem_phys + vmprocess->vm_arch.vm_seg[S].mem_len); assert(!(lo % I386_PAGE_SIZE)); assert(!(hi % I386_PAGE_SIZE)); if(lo < VM_PROCSTART) { moveup = VM_PROCSTART - lo; assert(!(VM_PROCSTART % I386_PAGE_SIZE)); assert(!(lo % I386_PAGE_SIZE)); assert(!(moveup % I386_PAGE_SIZE)); } /* Make new page table for ourselves, partly copied * from the current one. */ if(pt_new(newpt) != OK) panic("pt_init: pt_new failed"); /* Set up mappings for VM process. */ for(v = lo; v < hi; v += I386_PAGE_SIZE) { phys_bytes addr; u32_t flags; /* We have to write the new position in the PT, * so we can move our segments. */ if(pt_writemap(vmprocess, newpt, v+moveup, v, I386_PAGE_SIZE, I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK) panic("pt_init: pt_writemap failed"); } /* Move segments up too. */ vmprocess->vm_arch.vm_seg[T].mem_phys += ABS2CLICK(moveup); vmprocess->vm_arch.vm_seg[D].mem_phys += ABS2CLICK(moveup); vmprocess->vm_arch.vm_seg[S].mem_phys += ABS2CLICK(moveup); /* Allocate us a page table in which to remember page directory * pointers. */ if(!(page_directories = vm_allocpage(&page_directories_phys, VMP_PAGETABLE))) panic("no virt addr for vm mappings"); memset(page_directories, 0, I386_PAGE_SIZE); /* Increase our hardware data segment to create virtual address * space above our stack. We want to increase it to VM_DATATOP, * like regular processes have. */ extra_clicks = ABS2CLICK(VM_DATATOP - hi); vmprocess->vm_arch.vm_seg[S].mem_len += extra_clicks; /* We pretend to the kernel we have a huge stack segment to * increase our data segment. */ vmprocess->vm_arch.vm_data_top = (vmprocess->vm_arch.vm_seg[S].mem_vir + vmprocess->vm_arch.vm_seg[S].mem_len) << CLICK_SHIFT; /* Where our free virtual address space starts. * This is only a hint to the VM system. */ newpt->pt_virtop = 0; /* Let other functions know VM now has a private page table. */ vmprocess->vm_flags |= VMF_HASPT; /* Now reserve another pde for kernel's own mappings. */ { int kernmap_pde; phys_bytes addr, len; int flags, index = 0; u32_t offset = 0; kernmap_pde = free_pde++; offset = kernmap_pde * I386_BIG_PAGE_SIZE; while(sys_vmctl_get_mapping(index, &addr, &len, &flags) == OK) { vir_bytes vir; if(index >= MAX_KERNMAPPINGS) panic("VM: too many kernel mappings: %d", index); kern_mappings[index].phys_addr = addr; kern_mappings[index].len = len; kern_mappings[index].flags = flags; kern_mappings[index].lin_addr = offset; kern_mappings[index].flags = I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE | global_bit; if(flags & VMMF_UNCACHED) kern_mappings[index].flags |= PTF_NOCACHE; if(addr % I386_PAGE_SIZE) panic("VM: addr unaligned: %d", addr); if(len % I386_PAGE_SIZE) panic("VM: len unaligned: %d", len); vir = arch_map2vir(&vmproc[VMP_SYSTEM], offset); if(sys_vmctl_reply_mapping(index, vir) != OK) panic("VM: reply failed"); offset += len; index++; kernmappings++; } } /* Find a PDE below processes available for mapping in the * page directories (readonly). */ pagedir_pde = free_pde++; pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE; /* Tell kernel about free pde's. */ while(free_pde*I386_BIG_PAGE_SIZE < VM_PROCSTART) { if((r=sys_vmctl(SELF, VMCTL_I386_FREEPDE, free_pde++)) != OK) { panic("VMCTL_I386_FREEPDE failed: %d", r); } } /* first pde in use by process. */ proc_pde = free_pde; /* Give our process the new, copied, private page table. */ pt_mapkernel(newpt); /* didn't know about vm_dir pages earlier */ pt_bind(newpt, vmprocess); /* new segment limit for the kernel after paging is enabled */ ep_data.data_seg_limit = free_pde*I386_BIG_PAGE_SIZE; /* the memory map which must be installed after paging is enabled */ ep_data.mem_map = vmprocess->vm_arch.vm_seg; /* Now actually enable paging. */ if(sys_vmctl_enable_paging(&ep_data) != OK) panic("pt_init: enable paging failed"); /* Back to reality - this is where the stack actually is. */ vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks; /* Pretend VM stack top is the same as any regular process, not to * have discrepancies with new VM instances later on. */ vmprocess->vm_stacktop = VM_STACKTOP; /* All OK. */ return; }
/*===========================================================================* * pt_writemap * *===========================================================================*/ PUBLIC int pt_writemap(struct vmproc * vmp, pt_t *pt, vir_bytes v, phys_bytes physaddr, size_t bytes, u32_t flags, u32_t writemapflags) { /* Write mapping into page table. Allocate a new page table if necessary. */ /* Page directory and table entries for this virtual address. */ int p, r, pages; int verify = 0; int ret = OK; /* FIXME * don't do it everytime, stop the process only on the first change and * resume the execution on the last change. Do in a wrapper of this * function */ if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && !(vmp->vm_flags & VMF_EXITING)) sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0); if(writemapflags & WMF_VERIFY) verify = 1; assert(!(bytes % I386_PAGE_SIZE)); assert(!(flags & ~(PTF_ALLFLAGS))); pages = bytes / I386_PAGE_SIZE; /* MAP_NONE means to clear the mapping. It doesn't matter * what's actually written into the PTE if I386_VM_PRESENT * isn't on, so we can just write MAP_NONE into it. */ assert(physaddr == MAP_NONE || (flags & I386_VM_PRESENT)); assert(physaddr != MAP_NONE || !flags); /* First make sure all the necessary page tables are allocated, * before we start writing in any of them, because it's a pain * to undo our work properly. */ ret = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify); if(ret != OK) { goto resume_exit; } /* Now write in them. */ for(p = 0; p < pages; p++) { u32_t entry; int pde = I386_VM_PDE(v); int pte = I386_VM_PTE(v); assert(!(v % I386_PAGE_SIZE)); assert(pte >= 0 && pte < I386_VM_PT_ENTRIES); assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); /* Page table has to be there. */ assert(pt->pt_dir[pde] & I386_VM_PRESENT); /* Make sure page directory entry for this page table * is marked present and page table entry is available. */ assert((pt->pt_dir[pde] & I386_VM_PRESENT)); assert(pt->pt_pt[pde]); #if SANITYCHECKS /* We don't expect to overwrite a page. */ if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY))) assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)); #endif if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) { physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK; } if(writemapflags & WMF_FREE) { free_mem(ABS2CLICK(physaddr), 1); } /* Entry we will write. */ entry = (physaddr & I386_VM_ADDR_MASK) | flags; if(verify) { u32_t maskedentry; maskedentry = pt->pt_pt[pde][pte]; maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY); /* Verify pagetable entry. */ if(entry & I386_VM_WRITE) { /* If we expect a writable page, allow a readonly page. */ maskedentry |= I386_VM_WRITE; } if(maskedentry != entry) { printf("pt_writemap: mismatch: "); if((entry & I386_VM_ADDR_MASK) != (maskedentry & I386_VM_ADDR_MASK)) { printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ", entry, maskedentry); } else printf("phys ok; "); printf(" flags: found %s; ", ptestr(pt->pt_pt[pde][pte])); printf(" masked %s; ", ptestr(maskedentry)); printf(" expected %s\n", ptestr(entry)); ret = EFAULT; goto resume_exit; } } else { /* Write pagetable entry. */ #if SANITYCHECKS assert(vm_addrok(pt->pt_pt[pde], 1)); #endif pt->pt_pt[pde][pte] = entry; } physaddr += I386_PAGE_SIZE; v += I386_PAGE_SIZE; } resume_exit: if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && !(vmp->vm_flags & VMF_EXITING)) sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0); return ret; }
/*===========================================================================* * pt_writemap * *===========================================================================*/ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, size_t bytes, u32_t flags, u32_t writemapflags) { /* Write mapping into page table. Allocate a new page table if necessary. */ /* Page directory and table entries for this virtual address. */ int p, pages, pdecheck; int finalpde; int verify = 0; if(writemapflags & WMF_VERIFY) verify = 1; vm_assert(!(bytes % I386_PAGE_SIZE)); vm_assert(!(flags & ~(PTF_ALLFLAGS))); pages = bytes / I386_PAGE_SIZE; /* MAP_NONE means to clear the mapping. It doesn't matter * what's actually written into the PTE if I386_VM_PRESENT * isn't on, so we can just write MAP_NONE into it. */ #if SANITYCHECKS if(physaddr != MAP_NONE && !(flags & I386_VM_PRESENT)) { vm_panic("pt_writemap: writing dir with !P\n", NO_NUM); } if(physaddr == MAP_NONE && flags) { vm_panic("pt_writemap: writing 0 with flags\n", NO_NUM); } #endif finalpde = I386_VM_PDE(v + I386_PAGE_SIZE * pages); /* First make sure all the necessary page tables are allocated, * before we start writing in any of them, because it's a pain * to undo our work properly. Walk the range in page-directory-entry * sized leaps. */ for(pdecheck = I386_VM_PDE(v); pdecheck <= finalpde; pdecheck++) { vm_assert(pdecheck >= 0 && pdecheck < I386_VM_DIR_ENTRIES); if(pt->pt_dir[pdecheck] & I386_VM_BIGPAGE) { printf("pt_writemap: trying to write 0x%lx into 0x%lx\n", physaddr, v); vm_panic("pt_writemap: BIGPAGE found", NO_NUM); } if(!(pt->pt_dir[pdecheck] & I386_VM_PRESENT)) { int r; if(verify) { printf("pt_writemap verify: no pde %d\n", pdecheck); return EFAULT; } vm_assert(!pt->pt_dir[pdecheck]); if((r=pt_ptalloc(pt, pdecheck, flags)) != OK) { /* Couldn't do (complete) mapping. * Don't bother freeing any previously * allocated page tables, they're * still writable, don't point to nonsense, * and pt_ptalloc leaves the directory * and other data in a consistent state. */ printf("pt_writemap: pt_ptalloc failed\n", pdecheck); return r; } } vm_assert(pt->pt_dir[pdecheck] & I386_VM_PRESENT); } /* Now write in them. */ for(p = 0; p < pages; p++) { u32_t entry; int pde = I386_VM_PDE(v); int pte = I386_VM_PTE(v); vm_assert(!(v % I386_PAGE_SIZE)); vm_assert(pte >= 0 && pte < I386_VM_PT_ENTRIES); vm_assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); /* Page table has to be there. */ vm_assert(pt->pt_dir[pde] & I386_VM_PRESENT); /* Make sure page directory entry for this page table * is marked present and page table entry is available. */ vm_assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]); #if SANITYCHECKS /* We don't expect to overwrite a page. */ if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY))) vm_assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)); #endif if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) { physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK; } if(writemapflags & WMF_FREE) { FREE_MEM(ABS2CLICK(physaddr), 1); } /* Entry we will write. */ entry = (physaddr & I386_VM_ADDR_MASK) | flags; if(verify) { u32_t maskedentry; maskedentry = pt->pt_pt[pde][pte]; maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY); /* Verify pagetable entry. */ if(maskedentry != entry) { printf("pt_writemap: 0x%lx found, masked 0x%lx, 0x%lx expected\n", pt->pt_pt[pde][pte], maskedentry, entry); return EFAULT; } } else { /* Write pagetable entry. */ pt->pt_pt[pde][pte] = entry; } physaddr += I386_PAGE_SIZE; v += I386_PAGE_SIZE; } return OK; }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC int main(void) { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register int i, j; size_t argsz; /* size of arguments passed to crtso on stack */ BKL_LOCK(); /* Global value to test segment sanity. */ magictest = MAGICTEST; DEBUGEXTRA(("main()\n")); proc_init(); /* Set up proc table entries for processes in boot image. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. */ for (i=0; i < NR_BOOT_PROCS; ++i) { int schedulable_proc; proc_nr_t proc_nr; int ipc_to_m, kcalls; sys_map_t map; ip = &image[i]; /* process' attributes */ DEBUGEXTRA(("initializing %s... ", ip->proc_name)); rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ make_zero64(rp->p_cpu_time_left); strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ reset_proc_accounting(rp); /* See if this process is immediately schedulable. * In that case, set its privileges now and allow it to run. * Only kernel tasks and the root system process get to run immediately. * All the other system processes are inhibited from running by the * RTS_NO_PRIV flag. They can only be scheduled once the root system * process has set their privileges. */ proc_nr = proc_nr(rp); schedulable_proc = (iskerneln(proc_nr) || isrootsysn(proc_nr)); if(schedulable_proc) { /* Assign privilege structure. Force a static privilege id. */ (void) get_priv(rp, static_priv_id(proc_nr)); /* Priviliges for kernel tasks. */ if(iskerneln(proc_nr)) { /* Privilege flags. */ priv(rp)->s_flags = (proc_nr == IDLE ? IDL_F : TSK_F); /* Allowed traps. */ priv(rp)->s_trap_mask = (proc_nr == CLOCK || proc_nr == SYSTEM ? CSK_T : TSK_T); ipc_to_m = TSK_M; /* allowed targets */ kcalls = TSK_KC; /* allowed kernel calls */ } /* Priviliges for the root system process. */ else if(isrootsysn(proc_nr)) { priv(rp)->s_flags= RSYS_F; /* privilege flags */ priv(rp)->s_trap_mask= SRV_T; /* allowed traps */ ipc_to_m = SRV_M; /* allowed targets */ kcalls = SRV_KC; /* allowed kernel calls */ priv(rp)->s_sig_mgr = SRV_SM; /* signal manager */ rp->p_priority = SRV_Q; /* priority queue */ rp->p_quantum_size_ms = SRV_QT; /* quantum size */ } /* Priviliges for ordinary process. */ else { NOT_REACHABLE; } /* Fill in target mask. */ memset(&map, 0, sizeof(map)); if (ipc_to_m == ALL_M) { for(j = 0; j < NR_SYS_PROCS; j++) set_sys_bit(map, j); } fill_sendto_mask(rp, &map); /* Fill in kernel call mask. */ for(j = 0; j < SYS_CALL_MASK_SIZE; j++) { priv(rp)->s_k_call_mask[j] = (kcalls == NO_C ? 0 : (~0)); } } else { /* Don't let the process run for now. */ RTS_SET(rp, RTS_NO_PRIV | RTS_NO_QUANTUM); } rp->p_memmap[T].mem_vir = ABS2CLICK(ip->memmap.text_vaddr); rp->p_memmap[T].mem_phys = ABS2CLICK(ip->memmap.text_paddr); rp->p_memmap[T].mem_len = ABS2CLICK(ip->memmap.text_bytes); rp->p_memmap[D].mem_vir = ABS2CLICK(ip->memmap.data_vaddr); rp->p_memmap[D].mem_phys = ABS2CLICK(ip->memmap.data_paddr); rp->p_memmap[D].mem_len = ABS2CLICK(ip->memmap.data_bytes); rp->p_memmap[S].mem_phys = ABS2CLICK(ip->memmap.data_paddr + ip->memmap.data_bytes + ip->memmap.stack_bytes); rp->p_memmap[S].mem_vir = ABS2CLICK(ip->memmap.data_vaddr + ip->memmap.data_bytes + ip->memmap.stack_bytes); rp->p_memmap[S].mem_len = 0; /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = ip->memmap.entry; rp->p_reg.psw = (iskerneln(proc_nr)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down three words * to give crtso.s something to use as "argc", "argv" and "envp". */ if (isusern(proc_nr)) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; argsz = 3 * sizeof(reg_t); rp->p_reg.sp -= argsz; phys_memset(rp->p_reg.sp - (rp->p_memmap[S].mem_vir << CLICK_SHIFT) + (rp->p_memmap[S].mem_phys << CLICK_SHIFT), 0, argsz); } /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!get_cpulocal_var(proc_ptr)) get_cpulocal_var(proc_ptr) = rp; /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. */ if(ip->flags & PROC_FULLVM) rp->p_rts_flags |= RTS_VMINHIBIT; rp->p_rts_flags |= RTS_PROC_STOP; rp->p_rts_flags &= ~RTS_SLOT_FREE; alloc_segments(rp); DEBUGEXTRA(("done\n")); } #define IPCNAME(n) { \ assert((n) >= 0 && (n) <= IPCNO_HIGHEST); \ assert(!ipc_call_names[n]); \ ipc_call_names[n] = #n; \ } IPCNAME(SEND); IPCNAME(RECEIVE); IPCNAME(SENDREC); IPCNAME(NOTIFY); IPCNAME(SENDNB); IPCNAME(SENDA); /* Architecture-dependent initialization. */ DEBUGEXTRA(("arch_init()... ")); arch_init(); DEBUGEXTRA(("done\n")); /* System and processes initialization */ DEBUGEXTRA(("system_init()... ")); system_init(); DEBUGEXTRA(("done\n")); #ifdef CONFIG_SMP if (config_no_apic) { BOOT_VERBOSE(printf("APIC disabled, disables SMP, using legacy PIC\n")); smp_single_cpu_fallback(); } else if (config_no_smp) { BOOT_VERBOSE(printf("SMP disabled, using legacy PIC\n")); smp_single_cpu_fallback(); } else { smp_init(); /* * if smp_init() returns it means that it failed and we try to finish * single CPU booting */ bsp_finish_booting(); } #else /* * if configured for a single CPU, we are already on the kernel stack which we * are going to use everytime we execute kernel code. We finish booting and we * never return here */ bsp_finish_booting(); #endif NOT_REACHABLE; return 1; }
void init_vm(void) { int s, i; static struct memory mem_chunks[NR_MEMS]; static struct boot_image *ip; extern void __minix_init(void); #if SANITYCHECKS incheck = nocheck = 0; #endif /* Retrieve various crucial boot parameters */ if(OK != (s=sys_getkinfo(&kernel_boot_info))) { panic("couldn't get bootinfo: %d", s); } /* Sanity check */ assert(kernel_boot_info.mmap_size > 0); assert(kernel_boot_info.mods_with_kernel > 0); #if SANITYCHECKS env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX); #endif /* Get chunks of available memory. */ get_mem_chunks(mem_chunks); /* Set table to 0. This invalidates all slots (clear VMF_INUSE). */ memset(vmproc, 0, sizeof(vmproc)); for(i = 0; i < ELEMENTS(vmproc); i++) { vmproc[i].vm_slot = i; } /* region management initialization. */ map_region_init(); /* Initialize tables to all physical memory. */ mem_init(mem_chunks); /* Architecture-dependent initialization. */ init_proc(VM_PROC_NR); pt_init(); /* Give these processes their own page table. */ for (ip = &kernel_boot_info.boot_procs[0]; ip < &kernel_boot_info.boot_procs[NR_BOOT_PROCS]; ip++) { struct vmproc *vmp; if(ip->proc_nr < 0) continue; assert(ip->start_addr); /* VM has already been set up by the kernel and pt_init(). * Any other boot process is already in memory and is set up * here. */ if(ip->proc_nr == VM_PROC_NR) continue; vmp = init_proc(ip->proc_nr); exec_bootproc(vmp, ip); /* Free the file blob */ assert(!(ip->start_addr % VM_PAGE_SIZE)); ip->len = roundup(ip->len, VM_PAGE_SIZE); free_mem(ABS2CLICK(ip->start_addr), ABS2CLICK(ip->len)); } /* Set up table of calls. */ #define CALLMAP(code, func) { int i; \ i=CALLNUMBER(code); \ assert(i >= 0); \ assert(i < NR_VM_CALLS); \ vm_calls[i].vmc_func = (func); \ vm_calls[i].vmc_name = #code; \ } /* Set call table to 0. This invalidates all calls (clear * vmc_func). */ memset(vm_calls, 0, sizeof(vm_calls)); /* Basic VM calls. */ CALLMAP(VM_MMAP, do_mmap); CALLMAP(VM_MUNMAP, do_munmap); CALLMAP(VM_MAP_PHYS, do_map_phys); CALLMAP(VM_UNMAP_PHYS, do_munmap); /* Calls from PM. */ CALLMAP(VM_EXIT, do_exit); CALLMAP(VM_FORK, do_fork); CALLMAP(VM_BRK, do_brk); CALLMAP(VM_WILLEXIT, do_willexit); CALLMAP(VM_NOTIFY_SIG, do_notify_sig); /* Calls from RS */ CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv); CALLMAP(VM_RS_UPDATE, do_rs_update); CALLMAP(VM_RS_MEMCTL, do_rs_memctl); /* Calls from RS/VFS */ CALLMAP(VM_PROCCTL, do_procctl); /* Generic calls. */ CALLMAP(VM_REMAP, do_remap); CALLMAP(VM_REMAP_RO, do_remap); CALLMAP(VM_GETPHYS, do_get_phys); CALLMAP(VM_SHM_UNMAP, do_munmap); CALLMAP(VM_GETREF, do_get_refcount); CALLMAP(VM_INFO, do_info); CALLMAP(VM_QUERY_EXIT, do_query_exit); CALLMAP(VM_WATCH_EXIT, do_watch_exit); CALLMAP(VM_FORGETBLOCKS, do_forgetblocks); CALLMAP(VM_FORGETBLOCK, do_forgetblock); CALLMAP(VM_YIELDBLOCKGETBLOCK, do_yieldblockgetblock); /* Initialize the structures for queryexit */ init_query_exit(); /* Acquire kernel ipc vectors that weren't available * before VM had determined kernel mappings */ __minix_init(); }