void processStackmap(StackMap* stackmap) { int nrecords = stackmap ? stackmap->records.size() : 0; for (int i = 0; i < nrecords; i++) { StackMap::Record* r = stackmap->records[i]; assert(stackmap->stack_size_records.size() == 1); const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0]; int stack_size = stack_size_record.stack_size; PatchpointSetupInfo* pp = new_patchpoints_by_id[r->id]; assert(pp); bool has_scratch = (pp->numScratchBytes() != 0); int scratch_rbp_offset = 0; if (has_scratch) { assert(r->locations.size() == 1); StackMap::Record::Location l = r->locations[0]; static const int DWARF_RBP_REGNUM = 6; assert(l.type == 2); // "Direct" assert(l.regnum == DWARF_RBP_REGNUM); scratch_rbp_offset = l.offset; } else { assert(r->locations.size() == 0); } uint8_t* func_addr = (uint8_t*)pp->parent_cf->code; assert(func_addr); uint8_t* start_addr = func_addr + r->offset; std::unordered_set<int> live_outs; for (const auto& live_out : r->live_outs) { live_outs.insert(live_out.regnum); } // llvm doesn't consider callee-save registers to be live // if they're never allocated, but I think it makes much more // sense to track them as live_outs. // Unfortunately this means we need to be conservative about it unless // we can change llvm's behavior. live_outs.insert(3); live_outs.insert(12); live_outs.insert(13); live_outs.insert(14); live_outs.insert(15); registerCompiledPatchpoint(start_addr, pp, StackInfo({ stack_size, has_scratch, pp->numScratchBytes(), scratch_rbp_offset }), std::move(live_outs)); } for (const std::pair<int64_t, PatchpointSetupInfo*>& p : new_patchpoints_by_id) { delete p.second; } new_patchpoints_by_id.clear(); }
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(RUNTIMEICS_OMIT_FRAME_PTR) { static StatCounter sc("runtime_ics_num"); sc.log(); if (ENABLE_RUNTIME_ICS) { assert(SCRATCH_BYTES >= 0); assert(SCRATCH_BYTES < 0x80); // This would break both the instruction encoding and the dwarf encoding assert(SCRATCH_BYTES % 8 == 0); #if RUNTIMEICS_OMIT_FRAME_PTR /* * prologue: * sub $0x28, %rsp # 48 83 ec 28 * * epilogue: * add $0x28, %rsp # 48 83 c4 28 * retq # c3 * */ static const int PROLOGUE_SIZE = 4; static const int EPILOGUE_SIZE = 5; assert(SCRATCH_BYTES % 16 == 8); #else /* * The prologue looks like: * push %rbp # 55 * mov %rsp, %rbp # 48 89 e5 * sub $0x30, %rsp # 48 83 ec 30 * * The epilogue is: * add $0x30, %rsp # 48 83 c4 30 * pop %rbp # 5d * retq # c3 */ static const int PROLOGUE_SIZE = 8; static const int EPILOGUE_SIZE = 6; assert(SCRATCH_BYTES % 16 == 0); #endif static const int CALL_SIZE = 13; int patchable_size = num_slots * slot_size; #ifdef NVALGRIND int total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE; addr = malloc(total_size); #else total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE; addr = mmap(NULL, (total_size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); RELEASE_ASSERT(addr != MAP_FAILED, ""); #endif // printf("Allocated runtime IC at %p\n", addr); std::unique_ptr<ICSetupInfo> setup_info( ICSetupInfo::initialize(true, num_slots, slot_size, ICSetupInfo::Generic, NULL)); uint8_t* pp_start = (uint8_t*)addr + PROLOGUE_SIZE; uint8_t* pp_end = pp_start + patchable_size + CALL_SIZE; SpillMap _spill_map; PatchpointInitializationInfo initialization_info = initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, std::unordered_set<int>(), _spill_map); assert(_spill_map.size() == 0); assert(initialization_info.slowpath_start == pp_start + patchable_size); assert(initialization_info.slowpath_rtn_addr == pp_end); assert(initialization_info.continue_addr == pp_end); StackInfo stack_info(SCRATCH_BYTES, 0); icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(), stack_info, std::unordered_set<int>()); assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE); #if RUNTIMEICS_OMIT_FRAME_PTR // If SCRATCH_BYTES is 8 or less, we could use more compact instruction encodings // (push instead of sub), but it doesn't seem worth it for now. prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #else prologue_assem.push(assembler::RBP); prologue_assem.mov(assembler::RSP, assembler::RBP); prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #endif assert(!prologue_assem.hasFailed()); assert(prologue_assem.isExactlyFull()); assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE); #if RUNTIMEICS_OMIT_FRAME_PTR epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #else epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); epilogue_assem.pop(assembler::RBP); #endif epilogue_assem.retq(); assert(!epilogue_assem.hasFailed()); assert(epilogue_assem.isExactlyFull()); // TODO: ideally would be more intelligent about allocation strategies. // The code sections should be together and the eh sections together eh_frame.writeAndRegister(addr, total_size); } else { addr = func_addr; } }
RuntimeIC::RuntimeIC(void* func_addr, int total_size) { static StatCounter sc("num_runtime_ics"); sc.log(); if (ENABLE_RUNTIME_ICS) { assert(SCRATCH_BYTES >= 0); assert(SCRATCH_BYTES < 0x80); // This would break both the instruction encoding and the dwarf encoding assert(SCRATCH_BYTES % 8 == 0); #if RUNTIMEICS_OMIT_FRAME_PTR /* * prologue: * sub $0x28, %rsp # 48 83 ec 28 * * epilogue: * add $0x28, %rsp # 48 83 c4 28 * retq # c3 * */ static const int PROLOGUE_SIZE = 4; static const int EPILOGUE_SIZE = 5; assert(SCRATCH_BYTES % 16 == 8); #else /* * The prologue looks like: * push %rbp # 55 * mov %rsp, %rbp # 48 89 e5 * sub $0x30, %rsp # 48 83 ec 30 * * The epilogue is: * add $0x30, %rsp # 48 83 c4 30 * pop %rbp # 5d * retq # c3 */ static const int PROLOGUE_SIZE = 8; static const int EPILOGUE_SIZE = 6; assert(SCRATCH_BYTES % 16 == 0); #endif static const int CALL_SIZE = 13; int total_code_size = total_size - EH_FRAME_SIZE; int patchable_size = total_code_size - (PROLOGUE_SIZE + CALL_SIZE + EPILOGUE_SIZE); int total_size = total_code_size + EH_FRAME_SIZE; assert(total_size == 512 && "we currently only have a 512 byte block memory manager"); addr = memory_manager_512b.alloc(); // the memory block contains the EH frame directly followed by the generated machine code. void* eh_frame_addr = addr; addr = (char*)addr + EH_FRAME_SIZE; // printf("Allocated runtime IC at %p\n", addr); std::unique_ptr<ICSetupInfo> setup_info(ICSetupInfo::initialize(true, patchable_size, ICSetupInfo::Generic)); uint8_t* pp_start = (uint8_t*)addr + PROLOGUE_SIZE; uint8_t* pp_end = pp_start + patchable_size + CALL_SIZE; SpillMap _spill_map; PatchpointInitializationInfo initialization_info = initializePatchpoint3( func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, LiveOutSet(), _spill_map); assert(_spill_map.size() == 0); assert(initialization_info.slowpath_start == pp_start + patchable_size); assert(initialization_info.slowpath_rtn_addr == pp_end); assert(initialization_info.continue_addr == pp_end); StackInfo stack_info(SCRATCH_BYTES, 0); icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(), stack_info, LiveOutSet()); assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE); #if RUNTIMEICS_OMIT_FRAME_PTR // If SCRATCH_BYTES is 8 or less, we could use more compact instruction encodings // (push instead of sub), but it doesn't seem worth it for now. prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #else prologue_assem.push(assembler::RBP); prologue_assem.mov(assembler::RSP, assembler::RBP); prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #endif assert(!prologue_assem.hasFailed()); assert(prologue_assem.isExactlyFull()); assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE); #if RUNTIMEICS_OMIT_FRAME_PTR epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); #else epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP); epilogue_assem.pop(assembler::RBP); #endif epilogue_assem.retq(); assert(!epilogue_assem.hasFailed()); assert(epilogue_assem.isExactlyFull()); if (RUNTIMEICS_OMIT_FRAME_PTR) memcpy(eh_frame_addr, _eh_frame_template_ofp, _eh_frame_template_ofp_size); else memcpy(eh_frame_addr, _eh_frame_template_fp, _eh_frame_template_fp_size); register_eh_frame.updateAndRegisterFrameFromTemplate((uint64_t)addr, total_code_size, (uint64_t)eh_frame_addr, EH_FRAME_SIZE); } else { addr = func_addr; } }
int JitFragmentWriter::finishCompilation() { RELEASE_ASSERT(!assembler->hasFailed(), ""); commit(); if (failed) { blocks_aborted.insert(block); code_block.fragmentAbort(false); return 0; } if (assembler->hasFailed()) { int bytes_written = assembler->bytesWritten(); // don't retry JITing very large blocks const auto large_block_threshold = JitCodeBlock::code_size - 4096; if (bytes_written > large_block_threshold) { static StatCounter num_jit_large_blocks("num_baselinejit_skipped_large_blocks"); num_jit_large_blocks.log(); blocks_aborted.insert(block); code_block.fragmentAbort(false); } else { // we ran out of space - we allow a retry and set shouldCreateNewBlock to true in order to allocate a new // block for the next attempt. code_block.fragmentAbort(true /* not_enough_space */); } return 0; } block->code = (void*)((uint64_t)entry_code + code_offset); block->entry_code = (decltype(block->entry_code))entry_code; // if any side exits point to this block patch them to a direct jump to this block auto it = block_patch_locations.find(block); if (it != block_patch_locations.end()) { for (void* patch_location : it->second) { assembler::Assembler patch_asm((uint8_t*)patch_location, min_patch_size); int64_t offset = (uint64_t)block->code - (uint64_t)patch_location; if (isLargeConstant(offset)) { patch_asm.mov(assembler::Immediate(block->code), assembler::R11); patch_asm.jmpq(assembler::R11); } else patch_asm.jmp(assembler::JumpDestination::fromStart(offset)); RELEASE_ASSERT(!patch_asm.hasFailed(), "you may have to increase 'min_patch_size'"); } block_patch_locations.erase(it); } // if we have a side exit, remember its location for patching if (side_exit_patch_location.first) { void* patch_location = (uint8_t*)block->code + side_exit_patch_location.second; block_patch_locations[side_exit_patch_location.first].push_back(patch_location); } for (auto&& pp_info : pp_infos) { SpillMap _spill_map; uint8_t* start_addr = pp_info.start_addr; uint8_t* end_addr = pp_info.end_addr; PatchpointInitializationInfo initialization_info = initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */, 0 /* scratch_size */, std::unordered_set<int>(), _spill_map); uint8_t* slowpath_start = initialization_info.slowpath_start; uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr; std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint( start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, std::unordered_set<int>()); pp.release(); } void* next_fragment_start = (uint8_t*)block->code + assembler->bytesWritten(); code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start); return num_bytes_exit; }