bool FakeStackAllocator::allocate(cuda::GpuMat* mat, int rows, int cols, size_t elemSize) { if (memStack_ == 0) return false; size_t pitch, memSize; if (rows > 1 && cols > 1) { pitch = alignUp(cols * elemSize, alignment_); memSize = pitch * rows; } else { // Single row or single column must be continuous pitch = elemSize * cols; memSize = alignUp(elemSize * cols * rows, 64); } unsigned char* ptr = memStack_->requestMemory(memSize); if (ptr == 0) return false; mat->data = ptr; mat->step = pitch; mat->refcount = (int*) cv::fastMalloc(sizeof(int)); return true; }
void * get(size_t size, uint32_t alignment = 4) { // Ensure section alignment auto alignOffset = alignUp(mPtr, alignment) - mPtr; size += alignOffset; // Double-check alignment void * ptrOut = mPtr + alignOffset; assert(alignUp(ptrOut, alignment) == ptrOut); // Make sure we have room assert(mPtr + size <= mEnd); mPtr += size; return ptrOut; }
NIns* Assembler::genPrologue(RegisterMask needSaving) { /** * Prologue */ uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark; uint32_t savingCount = 0; for(Register i=FirstReg; i <= LastReg; i = nextreg(i)) if (needSaving&rmask(i)) savingCount++; // After forcing alignment, we've pushed the pre-alignment SP // and savingCount registers. uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount); uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK); uint32_t amt = aligned - stackPushed; // Reserve stackNeeded bytes, padded // to preserve NJ_ALIGN_STACK-byte alignment. if (amt) { #if defined NANOJIT_IA32 SUBi(SP, amt); #elif defined NANOJIT_AMD64 SUBQi(SP, amt); #endif } verbose_only( verbose_outputf(" %p:",_nIns); )
void allocatePages(struct vma* vmas) { int i=0; struct page* newPage=NULL; int perms; int numPages = ((uint64_t)alignUp((void *)vmas->end) - (uint64_t)alignDown((void *)vmas->start))/PAGE_SIZE; if(vmas->flags_vma & 0x2) perms = BIT_RW | BIT_PRESENT | BIT_USER; else perms = BIT_PRESENT | BIT_USER; for(i = 0; i<numPages; i++) { if(!(*pml4Walk(current_pcb->pml4, (uint64_t)(vmas->start+i*PAGE_SIZE)))) { newPage = page_alloc(); page_insert(current_pcb->pml4,(void *)(vmas->start+i*PAGE_SIZE), newPage, perms); //printf("Inserted new page PA:%x at faulting addr: %x\n", getPA(newPage), vmas->start+i*PAGE_SIZE); //printf("walk: %x\n",*pml4Walk(current_pcb->pml4, vmas->start+i*PAGE_SIZE)); } } }
void LargeObjectCache:: cleanupCacheIfNeededOnRange(uintptr_t range, uintptr_t currTime) { if (range >= cacheCleanupFreq || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup // (prev;prev+range] contains n*cacheCleanupFreq || alignUp(currTime, cacheCleanupFreq)<=currTime+range) doCleanup(currTime, /*doThreshDecr=*/false); }
NIns* Assembler::genPrologue(RegisterMask needSaving) { /** * Prologue */ // NJ_RESV_OFFSET is space at the top of the stack for us // to use for parameter passing (8 bytes at the moment) uint32_t stackNeeded = 4 * _activation.highwatermark + NJ_STACK_OFFSET; uint32_t savingCount = 0; uint32_t savingMask = 0; #if defined(NJ_THUMB_JIT) savingCount = 5; // R4-R7, LR savingMask = 0xF0; (void)needSaving; #else savingCount = 9; //R4-R10,R11,LR savingMask = SavedRegs | rmask(FRAME_PTR); (void)needSaving; #endif // so for alignment purposes we've pushed return addr, fp, and savingCount registers uint32_t stackPushed = 4 * (2+savingCount); uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK); int32_t amt = aligned - stackPushed; // Make room on stack for what we are doing if (amt) #ifdef NJ_THUMB_JIT { // largest value is 508 (7-bits << 2) if (amt>508) { int size = 508; while (size>0) { SUBi(SP, size); amt -= size; size = amt; if (size>508) size=508; } } else SUBi(SP, amt); } #else { SUBi(SP, amt); } #endif verbose_only( verbose_outputf(" %p:",_nIns); )
void AddressSpace::delRMem(VAddr begVAddr, VAddr endVAddr){ begVAddr=alignDown(begVAddr,getPageSize()); endVAddr=alignUp(endVAddr,getPageSize()); RAddr begRAddr=pageTable[getVPage(begVAddr)]; free((void *)begRAddr); for(VAddr pageNum=getVPage(begVAddr);pageNum!=getVPage(endVAddr);pageNum++){ if(pageTable[pageNum]!=begRAddr+(pageNum*getPageSize()-begVAddr)) fatal("AddressSpace::delRMem region not allocated contiguously"); pageTable[pageNum]=0; } }
void AddressSpace::newRMem(VAddr begVAddr, VAddr endVAddr){ begVAddr=alignDown(begVAddr,getPageSize()); endVAddr=alignUp(endVAddr,getPageSize()); void *realMem; if(posix_memalign(&realMem,getPageSize(),endVAddr-begVAddr)) fatal("AddressSpace::newRMem could not allocate memory\n"); for(size_t pageNum=getVPage(begVAddr);pageNum!=getVPage(endVAddr);pageNum++){ if(pageTable[pageNum]) fatal("AddressSpace::newRMem region overlaps with existing memory"); pageTable[pageNum]=(RAddr)realMem+(pageNum*getPageSize()-begVAddr); } }
static mps_res_t make(mps_addr_t *p, mps_ap_t ap, size_t size, mps_align_t align) { mps_res_t res; size = alignUp(size, align); do { MPS_RESERVE_BLOCK(res, *p, ap, size); if(res != MPS_RES_OK) return res; } while(!mps_commit(ap, *p, size)); return MPS_RES_OK; }
void *ExtMemoryPool::mallocLargeObject(size_t size, size_t alignment) { size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); // TODO: take into account that they are already largeObjectAlignment-aligned size_t allocationSize = alignUp(size+headersSize+alignment, largeBlockCacheStep); if (allocationSize < size) // allocationSize is wrapped around after alignUp return NULL; LargeMemoryBlock* lmb = loc.get(this, allocationSize); if (!lmb) { BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); if (backRefIdx.isInvalid()) return NULL; // unalignedSize is set in getLargeBlock lmb = backend.getLargeBlock(allocationSize); if (!lmb) { removeBackRef(backRefIdx); return NULL; } lmb->backRefIdx = backRefIdx; STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); } void *alignedArea = (void*)alignUp((uintptr_t)lmb+headersSize, alignment); LargeObjectHdr *header = (LargeObjectHdr*)alignedArea-1; header->memoryBlock = lmb; header->backRefIdx = lmb->backRefIdx; setBackRef(header->backRefIdx, header); lmb->objectSize = size; MALLOC_ASSERT( isLargeObject(alignedArea), ASSERT_TEXT ); return alignedArea; }
VAddr AddressSpace::findVMemLow(size_t memSize){ size_t needPages=alignUp(memSize,getPageSize())/getPageSize(); size_t foundPages=0; // Skip the first (zero) page, to avoid making null pointers valid size_t pageNum=1; while(foundPages<needPages){ if(pageTable[pageNum]) foundPages=0; else foundPages++; pageNum++; if(pageNum==pageTable.size()) fatal("AddressSpace::findVMemLow not enough available virtual memory\n"); } return (pageNum-needPages)*getPageSize(); }
VAddr AddressSpace::findVMemHigh(size_t memSize){ size_t needPages=alignUp(memSize,getPageSize())/getPageSize(); size_t foundPages=0; // Skip the last page, it creates addressing problems // becasue its upper-bound address is 0 due to wrap-around size_t pageNum=pageTable.size()-1; while(foundPages<needPages){ pageNum--; // Can not use page zero because that would make the null pointer valid if(pageNum==0) fatal("AddressSpace::findVMemLow not enough available virtual memory\n"); if(pageTable[pageNum]){ foundPages=0; }else{ foundPages++; } } return pageNum*getPageSize(); }
void * MEMAllocFromExpHeapEx(ExpandedHeap *heap, uint32_t size, int alignment) { ScopedSpinLock lock(&heap->lock); p32<ExpandedHeapBlock> freeBlock = nullptr, usedBlock = nullptr; auto direction = HeapDirection::FromBottom; uint32_t base; if (alignment < 0) { alignment = -alignment; direction = HeapDirection::FromTop; } // Add size for block header and alignment size += sizeof(ExpandedHeapBlock); size += alignment; if (heap->mode == HeapMode::FirstFree) { if (direction == HeapDirection::FromBottom) { // Find first block large enough from bottom of heap for (auto block = heap->freeBlockList; block; block = block->next) { if (block->size < size) { continue; } freeBlock = block; break; } } else if (direction == HeapDirection::FromTop) { // Find first block large enough from top of heap for (auto block = getTail(heap->freeBlockList); block; block = block->prev) { if (block->size < size) { continue; } freeBlock = block; break; } } } else if (heap->mode == HeapMode::NearestSize) { uint32_t nearestSize = -1; if (direction == HeapDirection::FromBottom) { // Find block nearest in size from bottom of heap for (auto block = heap->freeBlockList; block; block = block->next) { if (block->size < size) { continue; } if (block->size - size < nearestSize) { nearestSize = block->size - size; freeBlock = block; } } } else if (direction == HeapDirection::FromTop) { // Find block nearest in size from top of heap for (auto block = getTail(heap->freeBlockList); block; block = block->prev) { if (block->size < size) { continue; } if (block->size - size < nearestSize) { nearestSize = block->size - size; freeBlock = block; } } } } if (!freeBlock) { gLog->error("MEMAllocFromExpHeapEx failed, no free block found"); MEMiDumpExpHeap(heap); return 0; } if (direction == HeapDirection::FromBottom) { // Reduce freeblock size base = freeBlock->addr; freeBlock->size -= size; if (freeBlock->size < minimumBlockSize) { // Absorb free block as it is too small size += freeBlock->size; eraseBlock(heap->freeBlockList, freeBlock); } else { auto freeSize = freeBlock->size; // Replace free block auto old = freeBlock; freeBlock = make_p32<ExpandedHeapBlock>(base + size); freeBlock->addr = base + size; freeBlock->size = freeSize; replaceBlock(heap->freeBlockList, old, freeBlock); } } else if (direction == HeapDirection::FromTop) { // Reduce freeblock size freeBlock->size -= size; base = freeBlock->addr + freeBlock->size; if (freeBlock->size < minimumBlockSize) { // Absorb free block as it is too small size += freeBlock->size; eraseBlock(heap->freeBlockList, freeBlock); } } // Create a new used block auto aligned = alignUp(base + static_cast<uint32_t>(sizeof(ExpandedHeapBlock)), alignment); usedBlock = make_p32<ExpandedHeapBlock>(aligned - static_cast<uint32_t>(sizeof(ExpandedHeapBlock))); usedBlock->addr = base; usedBlock->size = size; usedBlock->group = heap->group; usedBlock->direction = direction; insertBlock(heap->usedBlockList, usedBlock); return make_p32<void>(aligned); }
static mps_res_t stress(mps_arena_t arena, mps_pool_debug_option_s *options, size_t (*size)(size_t i), mps_align_t align, const char *name, mps_pool_class_t pool_class, mps_arg_s *args) { mps_res_t res; mps_pool_t pool; size_t i, k; int *ps[testSetSIZE]; size_t ss[testSetSIZE]; size_t allocated = 0; /* Total allocated memory */ size_t debugOverhead = options ? 2 * alignUp(options->fence_size, align) : 0; printf("Pool class %s, alignment %u\n", name, (unsigned)align); res = mps_pool_create_k(&pool, arena, pool_class, args); if (res != MPS_RES_OK) return res; /* allocate a load of objects */ for (i=0; i<testSetSIZE; ++i) { mps_addr_t obj; ss[i] = (*size)(i); res = mps_alloc(&obj, pool, ss[i]); if (res != MPS_RES_OK) return res; ps[i] = obj; allocated += alignUp(ss[i], align) + debugOverhead; if (ss[i] >= sizeof(ps[i])) *ps[i] = 1; /* Write something, so it gets swap. */ check_allocated_size(pool, allocated); } mps_pool_check_fenceposts(pool); for (k=0; k<testLOOPS; ++k) { /* shuffle all the objects */ for (i=0; i<testSetSIZE; ++i) { size_t j = rnd()%(testSetSIZE-i); void *tp; size_t ts; tp = ps[j]; ts = ss[j]; ps[j] = ps[i]; ss[j] = ss[i]; ps[i] = tp; ss[i] = ts; } /* free half of the objects */ /* upper half, as when allocating them again we want smaller objects */ /* see randomSize() */ for (i=testSetSIZE/2; i<testSetSIZE; ++i) { mps_free(pool, (mps_addr_t)ps[i], ss[i]); /* if (i == testSetSIZE/2) */ /* PoolDescribe((Pool)pool, mps_lib_stdout); */ Insist(alignUp(ss[i], align) + debugOverhead <= allocated); allocated -= alignUp(ss[i], align) + debugOverhead; } /* allocate some new objects */ for (i=testSetSIZE/2; i<testSetSIZE; ++i) { mps_addr_t obj; ss[i] = (*size)(i); res = mps_alloc(&obj, pool, ss[i]); if (res != MPS_RES_OK) return res; ps[i] = obj; allocated += alignUp(ss[i], align) + debugOverhead; } check_allocated_size(pool, allocated); } die(PoolDescribe(pool, mps_lib_get_stdout(), 0), "PoolDescribe"); mps_pool_destroy(pool); return MPS_RES_OK; }
void TestObjectRecognition() { size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize size_t obtainedSize; ASSERT(sizeof(BackRefIdx)==4, "Unexpected size of BackRefIdx"); ASSERT(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size"); void* mem = scalable_malloc(2*slabSize); ASSERT(mem, "Memory was not allocated"); Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize); falseBlock->objectSize = falseObjectSize; char* falseSO = (char*)falseBlock + falseObjectSize*7; ASSERT(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big"); void* bufferLOH = scalable_malloc(2*slabSize + headersSize); ASSERT(bufferLOH, "Memory was not allocated"); LargeObjectHdr* falseLO = (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize); LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1; headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH; headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize; headerLO->memoryBlock->objectSize = slabSize + headersSize; headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); setBackRef(headerLO->backRefIdx, headerLO); ASSERT(scalable_msize(falseLO) == slabSize + headersSize, "Error in test: LOH falsification failed"); removeBackRef(headerLO->backRefIdx); const int NUM_OF_IDX = BR_MAX_CNT+2; BackRefIdx idxs[NUM_OF_IDX]; for (int cnt=0; cnt<2; cnt++) { for (int master = -10; master<10; master++) { falseBlock->backRefIdx.master = (uint16_t)master; headerLO->backRefIdx.master = (uint16_t)master; for (int bl = -10; bl<BR_MAX_CNT+10; bl++) { falseBlock->backRefIdx.offset = (uint16_t)bl; headerLO->backRefIdx.offset = (uint16_t)bl; for (int largeObj = 0; largeObj<2; largeObj++) { falseBlock->backRefIdx.largeObj = largeObj; headerLO->backRefIdx.largeObj = largeObj; obtainedSize = safer_scalable_msize(falseSO, NULL); ASSERT(obtainedSize==0, "Incorrect pointer accepted"); obtainedSize = safer_scalable_msize(falseLO, NULL); ASSERT(obtainedSize==0, "Incorrect pointer accepted"); } } } if (cnt == 1) { for (int i=0; i<NUM_OF_IDX; i++) removeBackRef(idxs[i]); break; } for (int i=0; i<NUM_OF_IDX; i++) { idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false); setBackRef(idxs[i], NULL); } } char *smallPtr = (char*)scalable_malloc(falseObjectSize); obtainedSize = safer_scalable_msize(smallPtr, NULL); ASSERT(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?"); scalable_free(smallPtr); obtainedSize = safer_scalable_msize(mem, NULL); ASSERT(obtainedSize>=2*slabSize, "Correct pointer not accepted?"); scalable_free(mem); scalable_free(bufferLOH); }
static void processSections(UserModule &module, std::vector<elf::Section> §ions, const char *strData) { auto dataRange = std::make_pair(0u, 0u); auto codeRange = std::make_pair(0u, 0u); // Find all code & data sections and their address space for (auto i = 0u; i < sections.size(); ++i) { auto &rplSection = sections[i]; auto &header = rplSection.header; if (header.type != elf::SHT_PROGBITS && header.type != elf::SHT_NOBITS) { continue; } auto section = new UserModule::Section(); section->index = i; section->address = header.addr; section->name = strData + header.name; section->size = static_cast<uint32_t>(rplSection.data.size()); if (header.type == elf::SHT_NOBITS) { section->size = header.size; } auto start = section->address; auto end = section->address + section->size; if (header.flags & elf::SHF_EXECINSTR) { section->type = UserModule::Section::Code; if (codeRange.first == 0 || start < codeRange.first) { codeRange.first = start; } if (codeRange.second == 0 || end > codeRange.second) { codeRange.second = end; } } else { section->type = UserModule::Section::Data; if (dataRange.first == 0 || start < dataRange.first) { dataRange.first = start; } if (dataRange.second == 0 || end > dataRange.second) { dataRange.second = end; } } rplSection.section = section; module.sections.push_back(section); module.sectionMap[section->name] = section; } // Create thunk sections for SHT_RPL_IMPORTS! for (auto i = 0u; i < sections.size(); ++i) { auto &rplSection = sections[i]; auto &header = rplSection.header; if (header.type != elf::SHT_RPL_IMPORTS) { continue; } auto section = new UserModule::Section(); section->index = i; section->name = strData + header.name; section->library = rplSection.data.data() + 8; section->size = static_cast<uint32_t>(rplSection.data.size()); if (header.type == elf::SHT_NOBITS) { section->size = header.size; } if (header.flags & elf::SHF_EXECINSTR) { section->type = UserModule::Section::CodeImports; section->address = alignUp(codeRange.second, header.addralign); codeRange.second = section->address + section->size; } else { section->type = UserModule::Section::DataImports; section->address = alignUp(dataRange.second, header.addralign); dataRange.second = section->address + section->size; } rplSection.section = section; module.sections.push_back(section); module.sectionMap[section->name] = section; } // Allocate code & data sections in memory module.codeAddressRange = codeRange; module.dataAddressRange = dataRange; }
bool Loader::loadRPL(UserModule &module, const char *buffer, size_t size) { auto in = BigEndianView { buffer, size }; auto header = elf::Header { }; auto info = elf::FileInfo { }; auto sections = std::vector<elf::Section> { }; // Read header if (!elf::readHeader(in, header)) { gLog->error("Failed elf::readHeader"); return false; } // Check it is a CAFE abi rpl if (header.abi != elf::EABI_CAFE) { gLog->error("Unexpected elf abi found {:02x} expected {:02x}", header.abi, elf::EABI_CAFE); return false; } // Read sections if (!elf::readSections(in, header, sections)) { gLog->error("Failed elf::readSections"); return false; } // Process sections, find our data and code sections processSections(module, sections, sections[header.shstrndx].data.data()); // Update EntryInfo loadFileInfo(info, sections); module.entryPoint = header.entry; module.defaultStackSize = info.stackSize; // Allocate code & data sections in memory auto codeStart = module.codeAddressRange.first; auto codeSize = module.maxCodeSize; gMemory.alloc(codeStart, codeSize); // TODO: Append code to end of other loaded code sections auto dataStart = alignUp(codeStart + codeSize, 4096); auto dataSize = alignUp(module.dataAddressRange.second - module.dataAddressRange.first, 4096); auto dataEnd = dataStart + dataSize; gMemory.alloc(dataStart, dataSize); // TODO: Use OSDynLoad_MemAlloc for data section allocation // Update MEM2 memory bounds be_val<uint32_t> mem2start, mem2size; OSGetMemBound(OSMemoryType::MEM2, &mem2start, &mem2size); OSSetMemBound(OSMemoryType::MEM2, dataEnd, mem2size - (dataEnd - mem2start)); // Relocate sections relocateSections(sections, module.codeAddressRange.first, codeStart, module.dataAddressRange.first, dataStart); module.codeAddressRange.first = codeStart; module.codeAddressRange.second = codeStart + codeSize; module.dataAddressRange.first = dataStart; module.dataAddressRange.second = dataStart + dataSize; // Relocate entry point for (auto i = 0u; i < sections.size(); ++i) { auto §ion = sections[i]; if (section.header.addr <= header.entry && section.header.addr + section.data.size() > header.entry) { auto offset = section.section->address - section.header.addr; module.entryPoint = header.entry + offset; break; } } // Load sections into memory loadSections(sections); // Process small data sections processSmallDataSections(module); // Process symbols // TODO: Support more than one symbol section? for (auto i = 0u; i < sections.size(); ++i) { auto §ion = sections[i]; if (section.header.type != elf::SHT_SYMTAB) { continue; } processSymbols(module, section, sections); } // Process relocations for (auto i = 0u; i < sections.size(); ++i) { auto §ion = sections[i]; if (section.header.type != elf::SHT_RELA) { continue; } processRelocations(module, section, sections); } if (0) { // Print address ranges gLog->debug("Loaded module!"); gLog->debug("Code {:08x} -> {:08x}", module.codeAddressRange.first, module.codeAddressRange.second); gLog->debug("Data {:08x} -> {:08x}", module.dataAddressRange.first, module.dataAddressRange.second); // Print all sections gLog->debug("Sections:"); for (auto i = 0u; i < module.sections.size(); ++i) { auto section = module.sections[i]; gLog->debug("{:08x} {} {:x}", section->address, section->name, section->size); } // Print all symbols gLog->debug("Symbols:"); for (auto i = 0u; i < module.symbols.size(); ++i) { auto symbol = module.symbols[i]; if (symbol && symbol->name.size()) { gLog->debug("{:08x} {}", symbol->address, symbol->name); } } } return true; }
/* stress -- create a pool of the requested type and allocate in it */ static mps_res_t stress(mps_arena_t arena, mps_pool_debug_option_s *options, mps_align_t align, size_t (*size)(size_t i, mps_align_t align), const char *name, mps_class_t class, mps_arg_s args[]) { mps_res_t res = MPS_RES_OK; mps_pool_t pool; mps_ap_t ap; size_t i, k; int *ps[testSetSIZE]; size_t ss[testSetSIZE]; size_t allocated = 0; /* Total allocated memory */ size_t debugOverhead = options ? 2 * alignUp(options->fence_size, align) : 0; printf("stress %s\n", name); die(mps_pool_create_k(&pool, arena, class, args), "pool_create"); die(mps_ap_create(&ap, pool, mps_rank_exact()), "BufferCreate"); /* allocate a load of objects */ for (i=0; i<testSetSIZE; ++i) { ss[i] = (*size)(i, align); res = make((mps_addr_t *)&ps[i], ap, ss[i]); if (res != MPS_RES_OK) goto allocFail; allocated += ss[i] + debugOverhead; if (ss[i] >= sizeof(ps[i]))