int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); if (!PAGE_ALIGNED(md->phys_addr) || !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { /* * If the end address of this region is not aligned to page * size, the mapping is rounded up, and may end up sharing a * page frame with the next UEFI memory region. If we create * a block entry now, we may need to split it again when mapping * the next region, and support for that is going to be removed * from the MMU routines. So avoid block mappings altogether in * that case. */ allow_block_mappings = false; } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, __pgprot(prot_val | PTE_NG), allow_block_mappings); return 0; }
static void __init init_vdso_image(struct mips_vdso_image *image) { unsigned long num_pages, i; BUG_ON(!PAGE_ALIGNED(image->data)); BUG_ON(!PAGE_ALIGNED(image->size)); num_pages = image->size / PAGE_SIZE; for (i = 0; i < num_pages; i++) { image->mapping.pages[i] = virt_to_page(image->data + (i * PAGE_SIZE)); } }
static void __init init_vdso_image(struct mips_vdso_image *image) { unsigned long num_pages, i; unsigned long data_pfn; BUG_ON(!PAGE_ALIGNED(image->data)); BUG_ON(!PAGE_ALIGNED(image->size)); num_pages = image->size / PAGE_SIZE; data_pfn = __phys_to_pfn(__pa_symbol(image->data)); for (i = 0; i < num_pages; i++) image->mapping.pages[i] = pfn_to_page(data_pfn + i); }
/** ユーザ空間へのアクセス許可があることを確認する(実装部) @param[in] as 検査対象の仮想アドレス空間 @param[in] start 検査開始アドレス @param[in] count 検査範囲のアドレス長(単位:バイト) @param[in] prot 検査するアクセス権(仮想メモリ領域の保護属性) @retval true 対象範囲に物理ページが存在し, 必要なアクセス権がある @retval false 対象範囲に物理ページが存在しないか, 必要なアクセス権がない */ static bool user_area_can_access_nolock(vm *as, void *start, size_t count, vma_prot prot) { int rc; void *pg_start; void *pg_end; vma *vmap; kassert( as != NULL ); pg_start = (void *)PAGE_START((uintptr_t)start); pg_end = ( PAGE_ALIGNED( (uintptr_t)(start + count) ) ? ( start + count ) : ( (void *)PAGE_NEXT( (uintptr_t)(start + count) ) ) ); rc = _vm_find_vma_nolock(as, pg_start, &vmap); if ( rc != 0 ) goto can_not_access; if ( vmap->end < pg_end ) goto can_not_access; if ( !( vmap->prot & prot ) ) goto can_not_access; return true; can_not_access: return false; }
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr, uvm_va_block_t *va_block) { NvU64 virt_addr; uvm_reverse_map_t *reverse_map; const NvU64 base_key = dma_addr / PAGE_SIZE; uvm_page_index_t new_start_page; UVM_ASSERT(PAGE_ALIGNED(dma_addr)); UVM_ASSERT(va_block); UVM_ASSERT(va_block->va_range); if (!sysmem_mappings->gpu->access_counters_supported) return; uvm_spin_lock(&sysmem_mappings->reverse_map_lock); reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key); UVM_ASSERT(reverse_map); // Compute virt address by hand since the old VA block may be messed up // during split virt_addr = reverse_map->va_block->start + reverse_map->region.first * PAGE_SIZE; new_start_page = uvm_va_block_cpu_page_index(va_block, virt_addr); reverse_map->region = uvm_va_block_region(new_start_page, new_start_page + uvm_va_block_region_num_pages(reverse_map->region)); reverse_map->va_block = va_block; UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_map))); UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_map))); uvm_spin_unlock(&sysmem_mappings->reverse_map_lock); }
/* * This function implements the munmap(2) syscall. * * As with do_mmap() it should perform the required error checking, * before calling upon vmmap_remove() to do most of the work. * Remember to clear the TLB. */ int do_munmap(void *addr, size_t len) { /*NOT_YET_IMPLEMENTED("VM: do_munmap"); return -1;*/ dbg(DBG_PRINT,"go into do_mumap\n"); if((uint32_t)addr % PAGE_SIZE) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } if(len <= 0||len > 0xc0000000) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } KASSERT(PAGE_ALIGNED(addr)); if(((uint32_t)addr < USER_MEM_LOW) || ((uint32_t)addr > USER_MEM_HIGH) || ((uint32_t)addr+len > USER_MEM_HIGH)) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } vmmap_remove( curproc->p_vmmap, ADDR_TO_PN(addr), ((len - 1)/PAGE_SIZE + 1)); tlb_flush_all(); KASSERT(NULL != curproc->p_pagedir); dbg(DBG_PRINT,"(GRADING3A 2.b)\n"); return 0; }
/* * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be * executable, everything else can be mapped with the XN bits * set. Also take the new (optional) RO/XP bits into account. */ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) { u64 attr = md->attribute; u32 type = md->type; if (type == EFI_MEMORY_MAPPED_IO) return PROT_DEVICE_nGnRE; if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) /* * If the region is not aligned to the page size of the OS, we * can not use strict permissions, since that would also affect * the mapping attributes of the adjacent regions. */ return pgprot_val(PAGE_KERNEL_EXEC); /* R-- */ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == (EFI_MEMORY_XP | EFI_MEMORY_RO)) return pgprot_val(PAGE_KERNEL_RO); /* R-X */ if (attr & EFI_MEMORY_RO) return pgprot_val(PAGE_KERNEL_ROX); /* RW- */ if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE) return pgprot_val(PAGE_KERNEL); /* RWX */ return pgprot_val(PAGE_KERNEL_EXEC); }
void context_setup(context_t *c, context_func_t func, int arg1, void *arg2, void *kstack, size_t kstacksz, pagedir_t *pdptr) { KASSERT(NULL != pdptr); KASSERT(PAGE_ALIGNED(kstack)); c->c_kstack = (uintptr_t)kstack; c->c_kstacksz = kstacksz; c->c_pdptr = pdptr; /* put the arguments for __contect_initial_func onto the * stack, leave room at the bottom of the stack for a phony * return address (we should never return from the lowest * function on the stack */ c->c_esp = (uintptr_t)kstack + kstacksz; c->c_esp -= sizeof(arg2); *(void **)c->c_esp = arg2; c->c_esp -= sizeof(arg1); *(int *)c->c_esp = arg1; c->c_esp -= sizeof(context_func_t); *(context_func_t *)c->c_esp = func; c->c_esp -= sizeof(uintptr_t); c->c_ebp = c->c_esp; c->c_eip = (uintptr_t)__context_initial_func; }
int kasan_module_alloc(void *addr, size_t size) { void *ret; size_t shadow_size; unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) return -EINVAL; ret = __vmalloc_node_range(shadow_size, 1, shadow_start, shadow_start + shadow_size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); if (ret) { find_vm_area(addr)->flags |= VM_KASAN; kmemleak_ignore(ret); return 0; } return -ENOMEM; }
static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; int ret; struct page_change_data data; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); } if (start < MODULES_VADDR || start >= MODULES_END) return -EINVAL; if (end < MODULES_VADDR || end >= MODULES_END) return -EINVAL; if (!numpages) return 0; data.set_mask = set_mask; data.clear_mask = clear_mask; ret = apply_to_page_range(&init_mm, start, size, change_page_range, &data); flush_tlb_kernel_range(start, end); return ret; }
/* * Calculate the length sum of direct io vectors that can * be combined into one page vector. */ static size_t dio_get_pagev_size(const struct iov_iter *it) { const struct iovec *iov = it->iov; const struct iovec *iovend = iov + it->nr_segs; size_t size; size = iov->iov_len - it->iov_offset; /* * An iov can be page vectored when both the current tail * and the next base are page aligned. */ while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { size += iov->iov_len; } dout("dio_get_pagevlen len = %zu\n", size); return size; }
static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; struct vm_struct *area; int i; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); } /* * Kernel VA mappings are always live, and splitting live section * mappings into page mappings may cause TLB conflicts. This means * we have to ensure that changing the permission bits of the range * we are operating on does not result in such splitting. * * Let's restrict ourselves to mappings created by vmalloc (or vmap). * Those are guaranteed to consist entirely of page mappings, and * splitting is never needed. * * So check whether the [addr, addr + size) interval is entirely * covered by precisely one VM area that has the VM_ALLOC flag set. */ area = find_vm_area((void *)addr); if (!area || end > (unsigned long)area->addr + area->size || !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) return 0; /* * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), PAGE_SIZE, set_mask, clear_mask); } } /* * Get rid of potentially aliasing lazily unmapped vm areas that may * have permissions set that deviate from the ones we are setting here. */ vm_unmap_aliases(); return __change_memory_common(start, size, set_mask, clear_mask); }
void dma_load(uint8_t channel, void *start, int count) { KASSERT(PAGE_ALIGNED(start)); prd_t* table = DMA_PRDS[channel]; memset(table, 0, sizeof(prd_t)); /* set up the PRD for this operation */ table->prd_addr = pt_virt_to_phys((uintptr_t) start); table->prd_count = count; table->prd_last = 0x8000; return; }
/** sbrkシステムコールの実処理 @param[in] sbrk sbrkメッセージ @param[in] src 呼出元エンドポイント @retval 0 正常に更新した @retval -ENOMEM メモリ不足により更新に失敗した */ static int handle_sbrk(vm_sys_sbrk *sbrk,endpoint src) { int rc; thread *thr; void *cur_end; void *new_end; intrflags flags; acquire_all_thread_lock( &flags ); thr = thr_find_thread_by_tid_nolock(src); if ( thr == NULL ) { rc = -ENOENT; goto unlock_out; } rc = proc_expand_heap(thr->p, NULL, &cur_end); if ( rc != 0 ) goto unlock_out; if ( sbrk->inc == 0 ) { rc = 0; goto success_out; } new_end = ( PAGE_ALIGNED( (uintptr_t)(cur_end + sbrk->inc) ) ) ? ( (void *)( cur_end + sbrk->inc ) ) : ( (void *)PAGE_NEXT( (uintptr_t)(cur_end + sbrk->inc) ) ); if ( new_end < thr->p->heap->start ) { rc = -EINVAL; goto unlock_out; } rc = proc_expand_heap(thr->p, new_end, &cur_end); if ( rc != 0 ) goto unlock_out; success_out: release_all_thread_lock(&flags); if ( rc == 0 ) sbrk->old_heap_end = cur_end; return 0; unlock_out: release_all_thread_lock(&flags); return rc; }
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings, size_t max_out_mappings) { NvU64 key; size_t num_mappings = 0; const NvU64 base_key = dma_addr / PAGE_SIZE; NvU32 num_pages = region_size / PAGE_SIZE; UVM_ASSERT(region_size >= PAGE_SIZE); UVM_ASSERT(PAGE_ALIGNED(region_size)); UVM_ASSERT(sysmem_mappings->gpu->access_counters_supported); UVM_ASSERT(max_out_mappings > 0); uvm_spin_lock(&sysmem_mappings->reverse_map_lock); key = base_key; do { uvm_reverse_map_t *reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, key); if (reverse_map) { size_t num_chunk_pages = uvm_va_block_region_num_pages(reverse_map->region); NvU32 page_offset = key & (num_chunk_pages - 1); NvU32 num_mapping_pages = min(num_pages, (NvU32)num_chunk_pages - page_offset); // Sysmem mappings are removed during VA block destruction. // Therefore, we can safely retain the VA blocks as long as they // are in the reverse map and we hold the reverse map lock. uvm_va_block_retain(reverse_map->va_block); out_mappings[num_mappings] = *reverse_map; out_mappings[num_mappings].region.first += page_offset; out_mappings[num_mappings].region.outer = out_mappings[num_mappings].region.first + num_mapping_pages; if (++num_mappings == max_out_mappings) break; num_pages -= num_mapping_pages; key += num_mapping_pages; } else { --num_pages; ++key; } } while (num_pages > 0); uvm_spin_unlock(&sysmem_mappings->reverse_map_lock); return num_mappings; }
/* * Touches (reads/writes) the first word in every page of memory pointed to by * buf of len bytes. If buf is not on the page boundary, the word at buf is * touched instead. Note that the page is only touched if it's reserved (i.e. * neither committed, nor free). * * This function is used to work around a bug in DosRead that causes it to fail * when interrupted by the exception handler, see * https://github.com/bitwiseworks/libcx/issues/21. */ void touch_pages(void *buf, size_t len) { APIRET arc; ULONG dos_len; ULONG dos_flags; volatile ULONG buf_addr = (ULONG)buf; ULONG buf_end = buf_addr + len; /* * Note: we need to at least perform the write operation when toucing so that * in case if it's our memory mapped region then it's marked dirty and with * PAG_WRITE is set (on read PAG_WRITE would have been removed whcih would * cause DosRead to fail too). And, to make sure that the touched region is * not corrupted by touching, we first read the target word and then simply * write it back. */ if (!PAGE_ALIGNED(buf_addr)) { dos_len = PAGE_SIZE; arc = DosQueryMem((PVOID)PAGE_ALIGN(buf_addr), &dos_len, &dos_flags); TRACE_IF(arc, "DosQueryMem = %lu\n", arc); if (!arc && !(dos_flags & (PAG_FREE | PAG_COMMIT))) *(int *)buf_addr = *(int *)buf_addr; buf_addr = PAGE_ALIGN(buf_addr) + PAGE_SIZE; } while (buf_addr < buf_end) { dos_len = ~0U; arc = DosQueryMem((PVOID)PAGE_ALIGN(buf_addr), &dos_len, &dos_flags); TRACE_IF(arc, "DosQueryMem = %lu\n", arc); if (!arc && !(dos_flags & (PAG_FREE | PAG_COMMIT))) { /* touch all pages within the reported range */ dos_len += buf_addr; while (buf_addr < dos_len) { *(int *)buf_addr = *(int *)buf_addr; buf_addr += PAGE_SIZE; } } else { buf_addr += dos_len; } } }
void hal_page_control_etc( physaddr_t p, void *page_start_addr, page_mapped_t mapped, page_access_t access, u_int32_t flags ) { assert(PAGE_ALIGNED(p)); assert(PAGE_ALIGNED((unsigned)page_start_addr)); assert((flags & INTEL_PTE_PFN) == 0); if(mapped == page_unmap) access = page_noaccess; int bits = INTEL_PTE_USER | flags; // We need it for V86 mode - REDO IN A MORE SPECIFIC WAY, so that only VM86 pages are user accessible if(mapped == page_map) bits |= INTEL_PTE_VALID; if(mapped == page_map_io) bits |= INTEL_PTE_VALID|INTEL_PTE_WTHRU|INTEL_PTE_NCACHE; if(access == page_rw) bits |= INTEL_PTE_WRITE; pt_entry_t pte; pte = create_pte(p, bits); SHOW_FLOW( 7, "Mapping VA 0x%X to PA 0x%X, pte is 0x%X\n", page_start_addr, p, (long)pte ); if(mapped != page_unmap ) phantom_map_page( (linaddr_t)page_start_addr, pte ); else phantom_unmap_page( (linaddr_t)page_start_addr ); ftlbentry((int)page_start_addr); }
/* * This function implements the munmap(2) syscall. * * As with do_mmap() it should perform the required error checking, * before calling upon vmmap_remove() to do most of the work. * Remember to clear the TLB. */ int do_munmap(void *addr, size_t len) { /*NOT_YET_IMPLEMENTED("VM: do_munmap");*/ if ((size_t)addr < USER_MEM_LOW || (size_t)addr >= USER_MEM_HIGH) { return -EINVAL; } if(len == PAGE_SIZE * 15) { dbg(DBG_PRINT, "BREAK\n"); } if(!PAGE_ALIGNED(addr)){ dbg(DBG_PRINT,"Error: do_munmap failed due to addr or len is not page aligned!\n"); return -EINVAL; } if((len <= 0) || (len >= USER_MEM_HIGH - USER_MEM_LOW)){ dbg(DBG_PRINT,"Error: do_munmap failed due to len is <= 0!\n"); return -EINVAL; } vmmap_t *map = curproc->p_vmmap; uint32_t lopage; uint32_t npages; lopage = ADDR_TO_PN(addr); /* updated */ /* TODO: Check later: may change to: uint32_t hipage = ADDR_TO_PN((size_t)addr + len - 1) + 1; */ uint32_t hipage = ADDR_TO_PN((size_t)addr + len - 1) + 1; /*uint32_t hipage = ADDR_TO_PN((size_t)addr + len) + 1;*/ npages = hipage - lopage; int retval = vmmap_remove(map, lopage, npages); if(retval < 0){ dbg(DBG_PRINT,"Error: The unmapping of the vmarea was unsuccessful\n"); return retval; } /* clear TLB for this vaddr*/ /* Corrected */ tlb_flush_range((uintptr_t)addr, npages); return 0; }
/*! * Sets up resources for pmem context. * Later this will be split into implementation specific code, * one for pmem_block, one for pmem_mem. * The pmem_block implementation will allocate a double buffer, * the pmem_mem implementation will call DAX to retrieve the virtual * addresses for data and metadata for "cache_block" and "cloned_cache_block". */ int pmem_context_setup(struct bittern_cache *bc, struct kmem_cache *kmem_slab, struct cache_block *cache_block, struct cache_block *cloned_cache_block, struct pmem_context *ctx) { struct data_buffer_info *dbi; ASSERT_BITTERN_CACHE(bc); ASSERT(kmem_slab == bc->bc_kmem_map || kmem_slab == bc->bc_kmem_threads); ASSERT(ctx != NULL); M_ASSERT(ctx->magic1 == PMEM_CONTEXT_MAGIC1); M_ASSERT(ctx->magic2 == PMEM_CONTEXT_MAGIC2); dbi = &ctx->dbi; /* * this code copied from pagebuf_allocate_dbi() * in bittern_cache_main.h */ ASSERT(dbi->di_buffer_vmalloc_buffer == NULL); ASSERT(dbi->di_buffer_vmalloc_page == NULL); ASSERT(dbi->di_buffer_slab == NULL); ASSERT(dbi->di_buffer == NULL); ASSERT(dbi->di_page == NULL); ASSERT(dbi->di_flags == 0x0); ASSERT(atomic_read(&dbi->di_busy) == 0); dbi->di_buffer_vmalloc_buffer = kmem_cache_alloc(kmem_slab, GFP_NOIO); /*TODO_ADD_ERROR_INJECTION*/ if (dbi->di_buffer_vmalloc_buffer == NULL) { BT_DEV_TRACE(BT_LEVEL_ERROR, bc, NULL, cache_block, NULL, NULL, "kmem_cache_alloc kmem_slab failed"); printk_err("%s: kmem_cache_alloc kmem_slab failed\n", bc->bc_name); return -ENOMEM; } ASSERT(PAGE_ALIGNED(dbi->di_buffer_vmalloc_buffer)); dbi->di_buffer_vmalloc_page = virtual_to_page(dbi->di_buffer_vmalloc_buffer); ASSERT(dbi->di_buffer_vmalloc_page != NULL); dbi->di_buffer_slab = kmem_slab; return 0; }
static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); } /* * Kernel VA mappings are always live, and splitting live section * mappings into page mappings may cause TLB conflicts. This means * we have to ensure that changing the permission bits of the range * we are operating on does not result in such splitting. * * Let's restrict ourselves to mappings created by vmalloc (or vmap). * Those are guaranteed to consist entirely of page mappings, and * splitting is never needed. * * So check whether the [addr, addr + size) interval is entirely * covered by precisely one VM area that has the VM_ALLOC flag set. */ area = find_vm_area((void *)addr); if (!area || end > (unsigned long)area->addr + area->size || !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) return 0; return __change_memory_common(start, size, set_mask, clear_mask); }
/* * This function implements the munmap(2) syscall. * * As with do_mmap() it should perform the required error checking, * before calling upon vmmap_remove() to do most of the work. * Remember to clear the TLB. */ int do_munmap(void *addr, size_t len) { if ((uintptr_t) addr < USER_MEM_LOW || USER_MEM_HIGH - (uint32_t) addr < len){ return -EINVAL; } if (len == 0){ return -EINVAL; } if (!PAGE_ALIGNED(addr)){ return -EINVAL; } int ret = vmmap_remove(curproc->p_vmmap, ADDR_TO_PN(addr), (uint32_t) PAGE_ALIGN_UP(len) / PAGE_SIZE); /* no need to unmap range or flush the tlb, since this is done in * vmmap_remove() */ return ret; }
/* Helper function for the ELF loader. Maps the specified segment * of the program header from the given file in to the given address * space with the given memory offset (in pages). On success returns 0, otherwise * returns a negative error code for the ELF loader to return. * Note that since any error returned by this function should * cause the ELF loader to give up, it is acceptable for the * address space to be modified after returning an error. * Note that memoff can be negative */ static int _elf32_map_segment(vmmap_t *map, vnode_t *file, int32_t memoff, const Elf32_Phdr *segment) { uintptr_t addr; if (memoff < 0) { KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint32_t) -memoff); addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff); } else { addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff); } uint32_t off = segment->p_offset; uint32_t memsz = segment->p_memsz; uint32_t filesz = segment->p_filesz; dbg(DBG_ELF, "Mapping program segment: type %#x, offset %#08x," " vaddr %#08x, filesz %#x, memsz %#x, flags %#x, align %#x\n", segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz, segment->p_memsz, segment->p_flags, segment->p_align); /* check for bad data in the segment header */ if (PAGE_SIZE != segment->p_align) { dbg(DBG_ELF, "ERROR: segment does not have correct alignment\n"); return -ENOEXEC; } else if (filesz > memsz) { dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n"); return -ENOEXEC; } else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off)) { dbg(DBG_ELF, "ERROR: segment address and offset are not aligned correctly\n"); return -ENOEXEC; } int perms = 0; if (PF_R & segment->p_flags) { perms |= PROT_READ; } if (PF_W & segment->p_flags) { perms |= PROT_WRITE; } if (PF_X & segment->p_flags) { perms |= PROT_EXEC; } if (0 < filesz) { /* something needs to be mapped from the file */ /* start from the starting address and include enough pages to * map all filesz bytes of the file */ uint32_t lopage = ADDR_TO_PN(addr); uint32_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1; off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off); int ret; if (!vmmap_is_range_empty(map, lopage, npages)) { dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); return -ENOEXEC; } else if (0 > (ret = vmmap_map(map, file, lopage, npages, perms, MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL))) { return ret; } } if (memsz > filesz) { /* there is left over memory in the segment which must * be initialized to 0 (anonymously mapped) */ uint32_t lopage = ADDR_TO_PN(addr + filesz); uint32_t npages = ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) - lopage; int ret; if (npages > 1 && !vmmap_is_range_empty(map, lopage + 1, npages - 1)) { dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); return -ENOEXEC; } else if (0 > (ret = vmmap_map(map, NULL, lopage, npages, perms, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL))) { return ret; } else if (!PAGE_ALIGNED(addr + filesz) && filesz > 0) { /* In this case, we have accidentally zeroed too much of memory, as * we zeroed all memory in the page containing addr + filesz. * However, the remaining part of the data is not a full page, so we * should not just map in another page (as there could be garbage * after addr+filesz). For instance, consider the data-bss boundary * (c.f. Intel x86 ELF supplement pp. 82). * To fix this, we need to read in the contents of the file manually * and put them at that user space addr in the anon map we just * added. */ void *buf; if (NULL == (buf = page_alloc())) return -ENOMEM; if (!(0 > (ret = file->vn_ops->read(file, (off_t) PAGE_ALIGN_DOWN(off + filesz), buf, PAGE_OFFSET(addr + filesz))))) { ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz), buf, PAGE_OFFSET(addr + filesz)); } page_free(buf); return ret; } } return 0; }
/* * This function implements the mmap(2) syscall, but only * supports the MAP_SHARED, MAP_PRIVATE, MAP_FIXED, and * MAP_ANON flags. * * Add a mapping to the current process's address space. * You need to do some error checking; see the ERRORS section * of the manpage for the problems you should anticipate. * After error checking most of the work of this function is * done by vmmap_map(), but remember to clear the TLB. */ int do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off, void **ret) { /*NOT_YET_IMPLEMENTED("VM: do_mmap");*/ if(((flags & MAP_FIXED) == MAP_FIXED) && ((size_t)addr < USER_MEM_LOW || (size_t)addr >= USER_MEM_HIGH)){ return (int)MAP_FAILED; } if(!PAGE_ALIGNED(addr) || /*!PAGE_ALIGNED(len) ||*/ !PAGE_ALIGNED(off)){ dbg(DBG_PRINT,"Error: do_mmap failed due to addr or len or off is not page aligned!\n"); return (int)MAP_FAILED; } if((len <= 0) || (len >= USER_MEM_HIGH - USER_MEM_LOW)){ dbg(DBG_PRINT,"Error: do_mmap failed due to len is <= 0!\n"); return (int)MAP_FAILED; } if (!(((flags & MAP_PRIVATE) == MAP_PRIVATE) || ((flags & MAP_SHARED) == MAP_SHARED))) { return (int)MAP_FAILED; } if (((fd >= NFILES) || ( fd < 0)) && ((flags & MAP_ANON) != MAP_ANON)) { dbg(DBG_PRINT,"ERROR!!! fd = %d is out of range\n", fd); return (int)MAP_FAILED; } file_t *file = NULL; if ((flags & MAP_ANON) != MAP_ANON) { file = fget(fd); if (file == NULL) { return (int)MAP_FAILED; } if (((flags & MAP_PRIVATE) == MAP_PRIVATE) && ((file->f_mode & FMODE_READ) != FMODE_READ)) { fput(file); return (int)MAP_FAILED; } if (((flags & MAP_SHARED)==MAP_SHARED) && ((prot & PROT_WRITE) == PROT_WRITE) && /*(((file->f_mode & FMODE_READ )!=FMODE_READ)&&*/((file->f_mode &FMODE_WRITE)!=FMODE_WRITE)) { fput(file); return (int)MAP_FAILED; } if (((prot & PROT_WRITE)==PROT_WRITE)&&(file->f_mode==FMODE_APPEND)) { fput(file); return (int)MAP_FAILED; } if(file->f_vnode->vn_flags == VN_BUSY){ fput(file); return (int)MAP_FAILED; } } *ret = NULL; vmmap_t *map = curproc->p_vmmap; uint32_t lopage; uint32_t npages; vmarea_t *vma; lopage = ADDR_TO_PN(addr); uint32_t hipage = ADDR_TO_PN((size_t)addr + len - 1) + 1; /*uint32_t hipage = ADDR_TO_PN((size_t)addr + len) + 1;*/ npages = hipage - lopage; int dir = VMMAP_DIR_HILO; /* see elf32.c */ int retval; if ((flags & MAP_ANON) != MAP_ANON) { retval = vmmap_map(map, file->f_vnode, lopage, npages, prot, flags, off, dir, &vma); } else { retval = vmmap_map(map, NULL, lopage, npages, prot, flags, off, dir, &vma); } if(retval < 0){ if ((flags & MAP_ANON) != MAP_ANON) { fput(file); } dbg(DBG_PRINT,"Error: The mapping of the vmarea was unsuccessful\n"); return (int)MAP_FAILED; } *ret = PN_TO_ADDR (vma->vma_start); /* clear TLB for this vaddr*/ tlb_flush_range((uintptr_t)(*ret), npages); if ((flags & MAP_ANON) != MAP_ANON) { fput(file); } return 0; }
/* * return values: * - negative errno values for unrecoverable data corruption. * - 1 for successful restore. * - 0 for no restore (crash occurred in the middle of a transaction). */ int pmem_block_restore(struct bittern_cache *bc, struct cache_block *cache_block) { struct pmem_block_metadata *pmbm; uint128_t hash_metadata, hash_data; int ret; void *buffer_vaddr; struct page *buffer_page; struct pmem_api *pa = &bc->bc_papi; int block_id; ASSERT(bc != NULL); ASSERT(pa->papi_bdev_size_bytes > 0); ASSERT(pa->papi_bdev != NULL); ASSERT(sizeof(struct pmem_header) == PAGE_SIZE); block_id = cache_block->bcb_block_id; ASSERT(pa->papi_hdr.lm_cache_blocks != 0); ASSERT(block_id >= 1 && block_id <= pa->papi_hdr.lm_cache_blocks); ASSERT(cache_block != NULL); ASSERT(cache_block->bcb_block_id == block_id); pmbm = kmem_alloc(sizeof(struct pmem_block_metadata), GFP_NOIO); /*TODO_ADD_ERROR_INJECTION*/ if (pmbm == NULL) { BT_DEV_TRACE(BT_LEVEL_ERROR, bc, NULL, cache_block, NULL, NULL, "kmem_alloc pmem_block_metadata failed"); printk_err("%s: kmem_alloc pmem_block_metadata failed\n", bc->bc_name); return -ENOMEM; } ret = pmem_read_sync(bc, __cache_block_id_2_metadata_pmem_offset(bc, block_id), pmbm, sizeof(struct pmem_block_metadata)); /*TODO_ADD_ERROR_INJECTION*/ if (ret != 0) { ASSERT(ret < 0); BT_DEV_TRACE(BT_LEVEL_ERROR, bc, NULL, NULL, NULL, NULL, "pmem_read_sync failed, ret=%d", ret); printk_err("%s: pmem_read_sync failed, ret=%d\n", bc->bc_name, ret); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return ret; } /* * this can only happen if pmem is corrupt */ if (pmbm->pmbm_magic != MCBM_MAGIC) { pa->papi_stats.restore_corrupt_metadata_blocks++; printk_err("block id #%u: error: magic number(s) mismatch, magic=0x%x/0x%x\n", block_id, pmbm->pmbm_magic, MCBM_MAGIC); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return -EHWPOISON; } hash_metadata = murmurhash3_128(pmbm, PMEM_BLOCK_METADATA_HASHING_SIZE); if (uint128_ne(hash_metadata, pmbm->pmbm_hash_metadata)) { printk_err("block id #%u: metadata hash mismatch: stored_hash_metadata=" UINT128_FMT ", computed_hash_metadata" UINT128_FMT "\n", block_id, UINT128_ARG(pmbm->pmbm_hash_metadata), UINT128_ARG(hash_metadata)); pa->papi_stats.restore_hash_corrupt_metadata_blocks++; kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return -EHWPOISON; } if (CACHE_STATE_VALID(pmbm->pmbm_status)) { printk_info_ratelimited("block id #%u: metadata cache status valid %u(%s)\n", block_id, pmbm->pmbm_status, cache_state_to_str(pmbm->pmbm_status)); } else { /* * this can only happen if pmem is corrupt */ pa->papi_stats.restore_corrupt_metadata_blocks++; printk_err("block id #%u: error: metadata cache status invalid %u(%s)\n", block_id, pmbm->pmbm_status, cache_state_to_str(pmbm->pmbm_status)); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return -EHWPOISON; } if (pmbm->pmbm_status == S_INVALID) { printk_info_ratelimited("block id #%u: warning: metadata cache status is %u(%s), nothing to restore\n", block_id, pmbm->pmbm_status, cache_state_to_str(pmbm->pmbm_status)); pa->papi_stats.restore_invalid_metadata_blocks++; pa->papi_stats.restore_invalid_data_blocks++; kmem_free(pmbm, sizeof(struct pmem_block_metadata)); /* * restore ok */ return 1; } if (pmbm->pmbm_status != S_CLEAN && pmbm->pmbm_status != S_DIRTY) { printk_info_ratelimited("block id #%u: warning: metadata cache status is %u(%s) (transaction in progress), nothing to restore\n", block_id, pmbm->pmbm_status, cache_state_to_str(pmbm->pmbm_status)); pa->papi_stats.restore_pending_metadata_blocks++; kmem_free(pmbm, sizeof(struct pmem_block_metadata)); /* * Intermediate state (crashed during a transaction). * Caller will ignore this restore and reinitialize. */ return 0; } if (pmbm->pmbm_status == S_CLEAN) { pa->papi_stats.restore_valid_clean_metadata_blocks++; } else { ASSERT(pmbm->pmbm_status == S_DIRTY); pa->papi_stats.restore_valid_dirty_metadata_blocks++; } /* * if the metadata crc32c is ok, none of this should ever happen. */ ASSERT(block_id == pmbm->pmbm_block_id); ASSERT(is_sector_cache_aligned(pmbm->pmbm_device_sector)); buffer_vaddr = kmem_cache_alloc(bc->bc_kmem_map, GFP_NOIO); /*TODO_ADD_ERROR_INJECTION*/ if (buffer_vaddr == NULL) { BT_DEV_TRACE(BT_LEVEL_ERROR, bc, NULL, cache_block, NULL, NULL, "kmem_alloc kmem_map failed"); printk_err("%s: kmem_alloc kmem_map failed\n", bc->bc_name); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return -ENOMEM; } ASSERT(PAGE_ALIGNED(buffer_vaddr)); buffer_page = virtual_to_page(buffer_vaddr); M_ASSERT(buffer_page != NULL); ret = pmem_read_sync(bc, __cache_block_id_2_data_pmem_offset(bc, block_id), buffer_vaddr, PAGE_SIZE); /*TODO_ADD_ERROR_INJECTION*/ if (ret != 0) { ASSERT(ret < 0); BT_DEV_TRACE(BT_LEVEL_ERROR, bc, NULL, NULL, NULL, NULL, "pmem_read_sync failed, ret=%d", ret); printk_err("%s: pmem_read_sync failed, ret=%d\n", bc->bc_name, ret); kmem_cache_free(bc->bc_kmem_map, buffer_vaddr); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return ret; } hash_data = murmurhash3_128(buffer_vaddr, PAGE_SIZE); ASSERT(PAGE_ALIGNED(buffer_vaddr)); ASSERT(buffer_page != NULL); ASSERT(buffer_page == virtual_to_page(buffer_vaddr)); kmem_cache_free(bc->bc_kmem_map, buffer_vaddr); if (uint128_ne(hash_data, pmbm->pmbm_hash_data)) { printk_err("block id #%u: data hash mismatch: stored_hash_data=" UINT128_FMT ", computed_hash_data" UINT128_FMT "\n", block_id, UINT128_ARG(pmbm->pmbm_hash_data), UINT128_ARG(hash_data)); pa->papi_stats.restore_hash_corrupt_data_blocks++; kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return -EHWPOISON; } if (pmbm->pmbm_status == S_CLEAN) { pa->papi_stats.restore_valid_clean_data_blocks++; } else { ASSERT(pmbm->pmbm_status == S_DIRTY); pa->papi_stats.restore_valid_dirty_data_blocks++; } /* * every checks out, restore metadata info into cache_block descriptor */ cache_block->bcb_sector = pmbm->pmbm_device_sector; cache_block->bcb_state = pmbm->pmbm_status; cache_block->bcb_xid = pmbm->pmbm_xid; cache_block->bcb_hash_data = pmbm->pmbm_hash_data; ASSERT(cache_block->bcb_state == S_CLEAN || cache_block->bcb_state == S_DIRTY); ASSERT(cache_block->bcb_sector != -1); ASSERT(is_sector_number_valid(cache_block->bcb_sector)); ASSERT(cache_block->bcb_sector >= 0); printk_info_ratelimited("block id #%u: status=%u(%s), xid=%llu, sector=%llu, hash_metadata=" UINT128_FMT ", hash_data=" UINT128_FMT ": restore ok\n", pmbm->pmbm_block_id, pmbm->pmbm_status, cache_state_to_str(pmbm->pmbm_status), pmbm->pmbm_xid, pmbm->pmbm_device_sector, UINT128_ARG(pmbm->pmbm_hash_metadata), UINT128_ARG(pmbm->pmbm_hash_data)); kmem_free(pmbm, sizeof(struct pmem_block_metadata)); return 1; }
/* * This function implements the mmap(2) syscall, but only * supports the MAP_SHARED, MAP_PRIVATE, MAP_FIXED, and * MAP_ANON flags. * * Add a mapping to the current process's address space. * You need to do some error checking; see the ERRORS section * of the manpage for the problems you should anticipate. * After error checking most of the work of this function is * done by vmmap_map(), but remember to clear the TLB. */ int do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off, void **ret) { if (len == 0){ return -EINVAL; } if (!valid_map_type(flags)){ return -EINVAL; } if (!PAGE_ALIGNED(off)){ return -EINVAL; } if (!(flags & MAP_ANON) && (flags & MAP_FIXED) && !PAGE_ALIGNED(addr)){ return -EINVAL; } if (addr != NULL && (uint32_t) addr < USER_MEM_LOW){ return -EINVAL; } if (len > USER_MEM_HIGH){ return -EINVAL; } if (addr != NULL && len > USER_MEM_HIGH - (uint32_t) addr){ return -EINVAL; } if (addr == 0 && (flags & MAP_FIXED)){ return -EINVAL; } /* if ((!(flags & MAP_PRIVATE) && !(flags & MAP_SHARED))*/ /*|| ((flags & MAP_PRIVATE) && (flags & MAP_SHARED)))*/ /*{*/ /*return -EINVAL;*/ /*}*/ vnode_t *vnode; if (!(flags & MAP_ANON)){ if (!valid_fd(fd) || curproc->p_files[fd] == NULL){ return -EBADF; } file_t *f = curproc->p_files[fd]; vnode = f->f_vnode; if ((flags & MAP_PRIVATE) && !(f->f_mode & FMODE_READ)){ return -EACCES; } if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && !((f->f_mode & FMODE_READ) && (f->f_mode & FMODE_WRITE))) { return -EACCES; } /*if ((prot & PROT_WRITE) && (f->f_mode & FMODE_APPEND)){*/ /*return -EACCES;*/ /*}*/ } else { vnode = NULL; } vmarea_t *vma; int retval = vmmap_map(curproc->p_vmmap, vnode, ADDR_TO_PN(addr), (uint32_t) PAGE_ALIGN_UP(len) / PAGE_SIZE, prot, flags, off, VMMAP_DIR_HILO, &vma); KASSERT(retval == 0 || retval == -ENOMEM); if (ret != NULL && retval >= 0){ *ret = PN_TO_ADDR(vma->vma_start); pt_unmap_range(curproc->p_pagedir, (uintptr_t) PN_TO_ADDR(vma->vma_start), (uintptr_t) PN_TO_ADDR(vma->vma_start) + (uintptr_t) PAGE_ALIGN_UP(len)); tlb_flush_range((uintptr_t) PN_TO_ADDR(vma->vma_start), (uint32_t) PAGE_ALIGN_UP(len) / PAGE_SIZE); } return retval; }
/** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node * @ops: address of a pointer to the GICv3 operations * @params: address of a pointer to HW-specific parameters * * Returns 0 if a GICv3 has been found, with the low level operations * in *ops and the HW parameters in *params. Returns an error code * otherwise. */ int vgic_v3_probe(struct device_node *vgic_node, const struct vgic_ops **ops, const struct vgic_params **params) { int ret = 0; u32 gicv_idx; struct resource vcpu_res; struct vgic_params *vgic = &vgic_v3_params; vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); if (!vgic->maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); /* * The ListRegs field is 5 bits, but there is a architectural * maximum of 16 list registers. Just ignore bit 4... */ vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; vgic->can_emulate_gicv2 = false; if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) gicv_idx = 1; gicv_idx += 3; /* Also skip GICD, GICC, GICH */ if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { kvm_info("GICv3: no GICV resource entry\n"); vgic->vcpu_base = 0; } else if (!PAGE_ALIGNED(vcpu_res.start)) { pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)vcpu_res.start); vgic->vcpu_base = 0; } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", (unsigned long long)resource_size(&vcpu_res), PAGE_SIZE); vgic->vcpu_base = 0; } else { vgic->vcpu_base = vcpu_res.start; vgic->can_emulate_gicv2 = true; kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); } if (vgic->vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); vgic->vctrl_base = NULL; vgic->type = VGIC_V3; vgic->max_gic_vcpus = KVM_MAX_VCPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); *ops = &vgic_v3_ops; *params = vgic; out: of_node_put(vgic_node); return ret; }
/* * This function implements the brk(2) system call. * * This routine manages the calling process's "break" -- the ending address * of the process's "dynamic" region (often also referred to as the "heap"). * The current value of a process's break is maintained in the 'p_brk' member * of the proc_t structure that represents the process in question. * * The 'p_brk' and 'p_start_brk' members of a proc_t struct are initialized * by the loader. 'p_start_brk' is subsequently never modified; it always * holds the initial value of the break. Note that the starting break is * not necessarily page aligned! * * 'p_start_brk' is the lower limit of 'p_brk' (that is, setting the break * to any value less than 'p_start_brk' should be disallowed). * * The upper limit of 'p_brk' is defined by the minimum of (1) the * starting address of the next occuring mapping or (2) USER_MEM_HIGH. * That is, growth of the process break is limited only in that it cannot * overlap with/expand into an existing mapping or beyond the region of * the address space allocated for use by userland. (note the presence of * the 'vmmap_is_range_empty' function). * * The dynamic region should always be represented by at most ONE vmarea. * Note that vmareas only have page granularity, you will need to take this * into account when deciding how to set the mappings if p_brk or p_start_brk * is not page aligned. * * You are guaranteed that the process data/bss region is non-empty. * That is, if the starting brk is not page-aligned, its page has * read/write permissions. * * If addr is NULL, you should NOT fail as the man page says. Instead, * "return" the current break. We use this to implement sbrk(0) without writing * a separate syscall. Look in user/libc/syscall.c if you're curious. * * Also, despite the statement on the manpage, you MUST support combined use * of brk and mmap in the same process. * * Note that this function "returns" the new break through the "ret" argument. * Return 0 on success, -errno on failure. */ int do_brk(void *addr, void **ret) { dbg(DBG_VM, "\n"); void *cur_sbrk = curproc->p_start_brk; vmarea_t *vma; vmarea_t *nvma = NULL; if (addr == NULL) { *ret = curproc->p_brk; return 0; } KASSERT((uintptr_t)curproc->p_brk >= (uintptr_t)curproc->p_start_brk); if ((uintptr_t)cur_sbrk > (uintptr_t)addr) { return -ENOMEM; } /* check for upper limits */ if ((uintptr_t)addr >= USER_MEM_HIGH) return -ENOMEM; uintptr_t pbrk_pg = ADDR_TO_PN(curproc->p_brk); uintptr_t addr_pg = ADDR_TO_PN(addr); (PAGE_ALIGNED(addr))? (addr_pg) : (addr_pg++); (PAGE_ALIGNED(curproc->p_brk))? (pbrk_pg) : (pbrk_pg++); /* if they resides in the same page, just update p_brk */ if (pbrk_pg == addr_pg) { curproc->p_brk = addr; *ret = addr; return 0; } /* Get dynamic region's vmarea */ vma = vmmap_lookup(curproc->p_vmmap, ADDR_TO_PN(cur_sbrk)); KASSERT(vma && "vmarea for the dynamic region is not found!"); /* check to see if vma has a next vma */ if (vma->vma_plink.l_next != &curproc->p_vmmap->vmm_list) { nvma = list_item(vma->vma_plink.l_next, vmarea_t, vma_plink); KASSERT(nvma && "next vmarea should exist but could not be found!"); } /* check for upper limits */ if (nvma && addr_pg > nvma->vma_start) return -ENOMEM; /* Now, update the vma, and curpor->p_brk */ if (pbrk_pg > addr_pg) { vmmap_remove(curproc->p_vmmap, addr_pg, (pbrk_pg - addr_pg)); tlb_flush_range((uintptr_t)PN_TO_ADDR(addr_pg), pbrk_pg - addr_pg); } else { vma->vma_end = addr_pg; } curproc->p_brk = addr; *ret = addr; return 0; }
/* * Returns a copy @out of the UEFI memory descriptor @in if it is covered * entirely by a UEFI memory map entry with matching attributes. The virtual * address of @out is set according to the matching entry that was found. */ static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out) { u64 in_paddr = in->phys_addr; u64 in_size = in->num_pages << EFI_PAGE_SHIFT; efi_memory_desc_t *md; *out = *in; if (in->type != EFI_RUNTIME_SERVICES_CODE && in->type != EFI_RUNTIME_SERVICES_DATA) { pr_warn("Entry type should be RuntimeServiceCode/Data\n"); return false; } if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) { pr_warn("Entry attributes invalid: RO and XP bits both cleared\n"); return false; } if (PAGE_SIZE > EFI_PAGE_SIZE && (!PAGE_ALIGNED(in->phys_addr) || !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) { /* * Since arm64 may execute with page sizes of up to 64 KB, the * UEFI spec mandates that RuntimeServices memory regions must * be 64 KB aligned. We need to validate this here since we will * not be able to tighten permissions on such regions without * affecting adjacent regions. */ pr_warn("Entry address region misaligned\n"); return false; } for_each_efi_memory_desc(md) { u64 md_paddr = md->phys_addr; u64 md_size = md->num_pages << EFI_PAGE_SHIFT; if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; if (md->virt_addr == 0) { /* no virtual mapping has been installed by the stub */ break; } if (md_paddr > in_paddr || (in_paddr - md_paddr) >= md_size) continue; /* * This entry covers the start of @in, check whether * it covers the end as well. */ if (md_paddr + md_size < in_paddr + in_size) { pr_warn("Entry covers multiple EFI memory map regions\n"); return false; } if (md->type != in->type) { pr_warn("Entry type deviates from EFI memory map region type\n"); return false; } out->virt_addr = in_paddr + (md->virt_addr - md_paddr); return true; } pr_warn("No matching entry found in the EFI memory map\n"); return false; }
/* * This function implements the mmap(2) syscall, but only * supports the MAP_SHARED, MAP_PRIVATE, MAP_FIXED, and * MAP_ANON flags. * * Add a mapping to the current process's address space. * You need to do some error checking; see the ERRORS section * of the manpage for the problems you should anticipate. * After error checking most of the work of this function is * done by vmmap_map(), but remember to clear the TLB. */ int do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off, void **ret) { dbg(DBG_PRINT,"go into do_mmap\n"); file_t* file = NULL; if(!PAGE_ALIGNED(off)) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } if(len <= 0||len > 0xc0000000) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } if (((uint32_t)addr < USER_MEM_LOW || (uint32_t)addr > USER_MEM_HIGH) && flags& MAP_FIXED) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -1; } if(!(flags & MAP_SHARED || flags & MAP_PRIVATE)) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -EINVAL; } file = NULL; vnode_t *vn = NULL; int status = 0; uint32_t lopages = 0; size_t npages = (len - 1)/PAGE_SIZE + 1; vmarea_t *newvma = NULL; if(flags & MAP_FIXED) { dbg(DBG_PRINT,"(GRADING3C)\n"); lopages = ADDR_TO_PN( addr ); } if(!(flags & MAP_ANON)) { dbg(DBG_PRINT,"(GRADING3B)\n"); if(fd < 0 || fd > NFILES) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -1; } file = fget(fd); if((prot & PROT_WRITE && MAP_SHARED & flags) && (file->f_mode == FMODE_READ)) { dbg(DBG_PRINT,"(GRADING3C)\n"); fput(file); return -1; } if(file == NULL) { dbg(DBG_PRINT,"(GRADING3C)\n"); return -1; } vn = file->f_vnode; } status = vmmap_map(curproc->p_vmmap, vn, lopages, npages, prot, flags, off, VMMAP_DIR_HILO, &newvma); if(file != NULL) { dbg(DBG_PRINT,"(GRADING3B)\n"); fput(file); } if(newvma != NULL) { dbg(DBG_PRINT,"(GRADING3B)\n"); *ret = PN_TO_ADDR(newvma->vma_start); } if(status < 0) { dbg(DBG_PRINT,"(GRADING3C)\n"); KASSERT(file == NULL); return status; } tlb_flush_all(); KASSERT(curproc->p_pagedir != NULL); dbg(DBG_VM, "(GRADING3A 2.a)\n"); return 0; }