/*===========================================================================* * do_rs_update * *===========================================================================*/ int do_rs_update(message *m_ptr) { endpoint_t src_e, dst_e, reply_e; int src_p, dst_p; struct vmproc *src_vmp, *dst_vmp; int r, sys_upd_flags; src_e = m_ptr->m_lsys_vm_update.src; dst_e = m_ptr->m_lsys_vm_update.dst; sys_upd_flags = m_ptr->m_lsys_vm_update.flags; reply_e = m_ptr->m_source; /* Lookup slots for source and destination process. */ if(vm_isokendpt(src_e, &src_p) != OK) { printf("do_rs_update: bad src endpoint %d\n", src_e); return EINVAL; } src_vmp = &vmproc[src_p]; if(vm_isokendpt(dst_e, &dst_p) != OK) { printf("do_rs_update: bad dst endpoint %d\n", dst_e); return EINVAL; } dst_vmp = &vmproc[dst_p]; /* Check flags. */ if((sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP)) == 0) { /* Can't preallocate when transfering mmapped regions. */ if(map_region_lookup_type(dst_vmp, VR_PREALLOC_MAP)) { return ENOSYS; } } /* Let the kernel do the update first. */ r = sys_update(src_e, dst_e, sys_upd_flags & SF_VM_ROLLBACK ? SYS_UPD_ROLLBACK : 0); if(r != OK) { return r; } /* Do the update in VM now. */ r = swap_proc_slot(src_vmp, dst_vmp); if(r != OK) { return r; } r = swap_proc_dyn_data(src_vmp, dst_vmp, sys_upd_flags); if(r != OK) { return r; } pt_bind(&src_vmp->vm_pt, src_vmp); pt_bind(&dst_vmp->vm_pt, dst_vmp); /* Reply in case of external request, update-aware. */ if(reply_e != VM_PROC_NR) { if(reply_e == src_e) reply_e = dst_e; else if(reply_e == dst_e) reply_e = src_e; m_ptr->m_type = OK; r = ipc_send(reply_e, m_ptr); if(r != OK) { panic("ipc_send() error"); } } return SUSPEND; }
/*===========================================================================* * pt_init_mem * *===========================================================================*/ PUBLIC void pt_init_mem() { /* Architecture-specific memory initialization. Make sure all the pages * shared with the kernel and VM's page tables are mapped above the stack, * so that we can easily transfer existing mappings for new VM instances. */ phys_bytes new_page_directories_phys; u32_t *new_page_directories; phys_bytes new_pt_dir_phys; u32_t *new_pt_dir; phys_bytes new_pt_phys; u32_t *new_pt; pt_t *vmpt; int i; vmpt = &vmprocess->vm_pt; /* We should be running this when VM has been assigned a page * table and memory initialization has already been performed. */ assert(vmprocess->vm_flags & VMF_HASPT); assert(meminit_done); /* Throw away static spare pages. */ vm_checkspares(); for(i = 0; i < SPAREPAGES; i++) { if(sparepages[i].page && (vir_bytes) sparepages[i].page < vmprocess->vm_stacktop) { sparepages[i].page = NULL; missing_spares++; } } vm_checkspares(); /* Rellocate page for page directories pointers. */ if(!(new_page_directories = vm_allocpage(&new_page_directories_phys, VMP_PAGETABLE))) panic("unable to reallocated page for page dir ptrs"); assert((vir_bytes) new_page_directories >= vmprocess->vm_stacktop); memcpy(new_page_directories, page_directories, I386_PAGE_SIZE); page_directories = new_page_directories; pagedir_pde_val = (new_page_directories_phys & I386_VM_ADDR_MASK) | (pagedir_pde_val & ~I386_VM_ADDR_MASK); /* Remap in kernel. */ pt_mapkernel(vmpt); /* Reallocate VM's page directory. */ if((vir_bytes) vmpt->pt_dir < vmprocess->vm_stacktop) { if(!(new_pt_dir= vm_allocpage(&new_pt_dir_phys, VMP_PAGEDIR))) { panic("unable to reallocate VM's page directory"); } assert((vir_bytes) new_pt_dir >= vmprocess->vm_stacktop); memcpy(new_pt_dir, vmpt->pt_dir, I386_PAGE_SIZE); vmpt->pt_dir = new_pt_dir; vmpt->pt_dir_phys = new_pt_dir_phys; pt_bind(vmpt, vmprocess); } /* Reallocate VM's page tables. */ for(i = proc_pde; i < I386_VM_DIR_ENTRIES; i++) { if(!(vmpt->pt_dir[i] & I386_VM_PRESENT)) { continue; } assert(vmpt->pt_pt[i]); if((vir_bytes) vmpt->pt_pt[i] >= vmprocess->vm_stacktop) { continue; } vm_checkspares(); if(!(new_pt = vm_allocpage(&new_pt_phys, VMP_PAGETABLE))) panic("unable to reallocate VM's page table"); assert((vir_bytes) new_pt >= vmprocess->vm_stacktop); memcpy(new_pt, vmpt->pt_pt[i], I386_PAGE_SIZE); vmpt->pt_pt[i] = new_pt; vmpt->pt_dir[i] = (new_pt_phys & I386_VM_ADDR_MASK) | (vmpt->pt_dir[i] & ~I386_VM_ADDR_MASK); } }
/*===========================================================================* * pt_init * *===========================================================================*/ PUBLIC void pt_init(phys_bytes usedlimit) { /* By default, the kernel gives us a data segment with pre-allocated * memory that then can't grow. We want to be able to allocate memory * dynamically, however. So here we copy the part of the page table * that's ours, so we get a private page table. Then we increase the * hardware segment size so we can allocate memory above our stack. */ pt_t *newpt; int s, r; vir_bytes v; phys_bytes lo, hi; vir_bytes extra_clicks; u32_t moveup = 0; int global_bit_ok = 0; int free_pde; struct vm_ep_data ep_data; vir_bytes sparepages_mem; phys_bytes sparepages_ph; vir_bytes ptr; int f = 0; /* Shorthand. */ newpt = &vmprocess->vm_pt; /* Get ourselves spare pages. */ ptr = (vir_bytes) static_sparepages; ptr += I386_PAGE_SIZE - (ptr % I386_PAGE_SIZE); if(!(sparepages_mem = ptr)) panic("pt_init: aalloc for spare failed"); if((r=sys_umap(SELF, VM_D, (vir_bytes) sparepages_mem, I386_PAGE_SIZE*SPAREPAGES, &sparepages_ph)) != OK) panic("pt_init: sys_umap failed: %d", r); missing_spares = 0; assert(STATIC_SPAREPAGES < SPAREPAGES); for(s = 0; s < SPAREPAGES; s++) { if(s >= STATIC_SPAREPAGES) { sparepages[s].page = NULL; missing_spares++; continue; } sparepages[s].page = (void *) (sparepages_mem + s*I386_PAGE_SIZE); sparepages[s].phys = sparepages_ph + s*I386_PAGE_SIZE; } /* global bit and 4MB pages available? */ global_bit_ok = _cpufeature(_CPUF_I386_PGE); bigpage_ok = _cpufeature(_CPUF_I386_PSE); /* Set bit for PTE's and PDE's if available. */ if(global_bit_ok) global_bit = I386_VM_GLOBAL; /* The kernel and boot time processes need an identity mapping. * We use full PDE's for this without separate page tables. * Figure out which pde we can start using for other purposes. */ id_map_high_pde = usedlimit / I386_BIG_PAGE_SIZE; /* We have to make mappings up till here. */ free_pde = id_map_high_pde+1; /* Initial (current) range of our virtual address space. */ lo = CLICK2ABS(vmprocess->vm_arch.vm_seg[T].mem_phys); hi = CLICK2ABS(vmprocess->vm_arch.vm_seg[S].mem_phys + vmprocess->vm_arch.vm_seg[S].mem_len); assert(!(lo % I386_PAGE_SIZE)); assert(!(hi % I386_PAGE_SIZE)); if(lo < VM_PROCSTART) { moveup = VM_PROCSTART - lo; assert(!(VM_PROCSTART % I386_PAGE_SIZE)); assert(!(lo % I386_PAGE_SIZE)); assert(!(moveup % I386_PAGE_SIZE)); } /* Make new page table for ourselves, partly copied * from the current one. */ if(pt_new(newpt) != OK) panic("pt_init: pt_new failed"); /* Set up mappings for VM process. */ for(v = lo; v < hi; v += I386_PAGE_SIZE) { /* We have to write the new position in the PT, * so we can move our segments. */ if(pt_writemap(vmprocess, newpt, v+moveup, v, I386_PAGE_SIZE, I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK) panic("pt_init: pt_writemap failed"); } /* Move segments up too. */ vmprocess->vm_arch.vm_seg[T].mem_phys += ABS2CLICK(moveup); vmprocess->vm_arch.vm_seg[D].mem_phys += ABS2CLICK(moveup); vmprocess->vm_arch.vm_seg[S].mem_phys += ABS2CLICK(moveup); /* Allocate us a page table in which to remember page directory * pointers. */ if(!(page_directories = vm_allocpage(&page_directories_phys, VMP_PAGETABLE))) panic("no virt addr for vm mappings"); memset(page_directories, 0, I386_PAGE_SIZE); /* Increase our hardware data segment to create virtual address * space above our stack. We want to increase it to VM_DATATOP, * like regular processes have. */ extra_clicks = ABS2CLICK(VM_DATATOP - hi); vmprocess->vm_arch.vm_seg[S].mem_len += extra_clicks; /* We pretend to the kernel we have a huge stack segment to * increase our data segment. */ vmprocess->vm_arch.vm_data_top = (vmprocess->vm_arch.vm_seg[S].mem_vir + vmprocess->vm_arch.vm_seg[S].mem_len) << CLICK_SHIFT; /* Where our free virtual address space starts. * This is only a hint to the VM system. */ newpt->pt_virtop = 0; /* Let other functions know VM now has a private page table. */ vmprocess->vm_flags |= VMF_HASPT; /* Now reserve another pde for kernel's own mappings. */ { int kernmap_pde; phys_bytes addr, len; int flags, index = 0; u32_t offset = 0; kernmap_pde = free_pde++; offset = kernmap_pde * I386_BIG_PAGE_SIZE; while(sys_vmctl_get_mapping(index, &addr, &len, &flags) == OK) { vir_bytes vir; if(index >= MAX_KERNMAPPINGS) panic("VM: too many kernel mappings: %d", index); kern_mappings[index].phys_addr = addr; kern_mappings[index].len = len; kern_mappings[index].flags = flags; kern_mappings[index].lin_addr = offset; kern_mappings[index].flags = I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE | global_bit; if(flags & VMMF_UNCACHED) kern_mappings[index].flags |= PTF_NOCACHE; if(addr % I386_PAGE_SIZE) panic("VM: addr unaligned: %d", addr); if(len % I386_PAGE_SIZE) panic("VM: len unaligned: %d", len); vir = arch_map2vir(&vmproc[VMP_SYSTEM], offset); if(sys_vmctl_reply_mapping(index, vir) != OK) panic("VM: reply failed"); offset += len; index++; kernmappings++; } } /* Find a PDE below processes available for mapping in the * page directories (readonly). */ pagedir_pde = free_pde++; pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE; /* Tell kernel about free pde's. */ first_free_pde = free_pde; while(free_pde*I386_BIG_PAGE_SIZE < VM_PROCSTART && f < FREE_PDES) { if((r=sys_vmctl(SELF, VMCTL_I386_FREEPDE, free_pde++)) != OK) { panic("VMCTL_I386_FREEPDE failed: %d", r); } f++; } /* first pde in use by process. */ proc_pde = free_pde; /* Give our process the new, copied, private page table. */ pt_mapkernel(newpt); /* didn't know about vm_dir pages earlier */ pt_bind(newpt, vmprocess); /* new segment limit for the kernel after paging is enabled */ ep_data.data_seg_limit = free_pde*I386_BIG_PAGE_SIZE; /* the memory map which must be installed after paging is enabled */ ep_data.mem_map = vmprocess->vm_arch.vm_seg; /* Now actually enable paging. */ if(sys_vmctl_enable_paging(&ep_data) != OK) panic("pt_init: enable paging failed"); /* Back to reality - this is where the stack actually is. */ vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks; /* Pretend VM stack top is the same as any regular process, not to * have discrepancies with new VM instances later on. */ vmprocess->vm_stacktop = VM_STACKTOP; /* All OK. */ return; }
/*===========================================================================* * do_fork * *===========================================================================*/ int do_fork(message *msg) { int r, proc, childproc; struct vmproc *vmp, *vmc; pt_t origpt; vir_bytes msgaddr; SANITYCHECK(SCL_FUNCTIONS); if(vm_isokendpt(msg->VMF_ENDPOINT, &proc) != OK) { printf("VM: bogus endpoint VM_FORK %d\n", msg->VMF_ENDPOINT); SANITYCHECK(SCL_FUNCTIONS); return EINVAL; } childproc = msg->VMF_SLOTNO; if(childproc < 0 || childproc >= NR_PROCS) { printf("VM: bogus slotno VM_FORK %d\n", msg->VMF_SLOTNO); SANITYCHECK(SCL_FUNCTIONS); return EINVAL; } vmp = &vmproc[proc]; /* parent */ vmc = &vmproc[childproc]; /* child */ assert(vmc->vm_slot == childproc); /* The child is basically a copy of the parent. */ origpt = vmc->vm_pt; *vmc = *vmp; vmc->vm_slot = childproc; region_init(&vmc->vm_regions_avl); vmc->vm_endpoint = NONE; /* In case someone tries to use it. */ vmc->vm_pt = origpt; #if VMSTATS vmc->vm_bytecopies = 0; #endif if(pt_new(&vmc->vm_pt) != OK) { return ENOMEM; } SANITYCHECK(SCL_DETAIL); if(map_proc_copy(vmc, vmp) != OK) { printf("VM: fork: map_proc_copy failed\n"); pt_free(&vmc->vm_pt); return(ENOMEM); } /* Only inherit these flags. */ vmc->vm_flags &= VMF_INUSE; /* inherit the priv call bitmaps */ memcpy(&vmc->vm_call_mask, &vmp->vm_call_mask, sizeof(vmc->vm_call_mask)); /* Tell kernel about the (now successful) FORK. */ if((r=sys_fork(vmp->vm_endpoint, childproc, &vmc->vm_endpoint, PFF_VMINHIBIT, &msgaddr)) != OK) { panic("do_fork can't sys_fork: %d", r); } if((r=pt_bind(&vmc->vm_pt, vmc)) != OK) panic("fork can't pt_bind: %d", r); { vir_bytes vir; /* making these messages writable is an optimisation * and its return value needn't be checked. */ vir = msgaddr; if (handle_memory(vmc, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for child failed\n"); vir = msgaddr; if (handle_memory(vmp, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for parent failed\n"); } /* Inform caller of new child endpoint. */ msg->VMF_CHILD_ENDPOINT = vmc->vm_endpoint; SANITYCHECK(SCL_FUNCTIONS); return OK; }