Beispiel #1
0
void processStackmap(StackMap* stackmap) {
    int nrecords = stackmap ? stackmap->records.size() : 0;

    for (int i = 0; i < nrecords; i++) {
        StackMap::Record* r = stackmap->records[i];

        assert(stackmap->stack_size_records.size() == 1);
        const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];
        int stack_size = stack_size_record.stack_size;

        PatchpointSetupInfo* pp = new_patchpoints_by_id[r->id];
        assert(pp);

        bool has_scratch = (pp->numScratchBytes() != 0);
        int scratch_rbp_offset = 0;
        if (has_scratch) {
            assert(r->locations.size() == 1);

            StackMap::Record::Location l = r->locations[0];

            static const int DWARF_RBP_REGNUM = 6;

            assert(l.type == 2); // "Direct"
            assert(l.regnum == DWARF_RBP_REGNUM);
            scratch_rbp_offset = l.offset;
        } else {
            assert(r->locations.size() == 0);
        }

        uint8_t* func_addr = (uint8_t*)pp->parent_cf->code;
        assert(func_addr);
        uint8_t* start_addr = func_addr + r->offset;

        std::unordered_set<int> live_outs;
        for (const auto& live_out : r->live_outs) {
            live_outs.insert(live_out.regnum);
        }

        // llvm doesn't consider callee-save registers to be live
        // if they're never allocated, but I think it makes much more
        // sense to track them as live_outs.
        // Unfortunately this means we need to be conservative about it unless
        // we can change llvm's behavior.
        live_outs.insert(3);
        live_outs.insert(12);
        live_outs.insert(13);
        live_outs.insert(14);
        live_outs.insert(15);

        registerCompiledPatchpoint(start_addr, pp,
                                   StackInfo({ stack_size, has_scratch, pp->numScratchBytes(), scratch_rbp_offset }),
                                   std::move(live_outs));
    }

    for (const std::pair<int64_t, PatchpointSetupInfo*>& p : new_patchpoints_by_id) {
        delete p.second;
    }
    new_patchpoints_by_id.clear();
}
Beispiel #2
0
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(RUNTIMEICS_OMIT_FRAME_PTR) {
    static StatCounter sc("runtime_ics_num");
    sc.log();

    if (ENABLE_RUNTIME_ICS) {
        assert(SCRATCH_BYTES >= 0);
        assert(SCRATCH_BYTES < 0x80); // This would break both the instruction encoding and the dwarf encoding
        assert(SCRATCH_BYTES % 8 == 0);

#if RUNTIMEICS_OMIT_FRAME_PTR
        /*
         * prologue:
         * sub $0x28, %rsp  # 48 83 ec 28
         *
         * epilogue:
         * add $0x28, %rsp  # 48 83 c4 28
         * retq             # c3
         *
         */
        static const int PROLOGUE_SIZE = 4;
        static const int EPILOGUE_SIZE = 5;
        assert(SCRATCH_BYTES % 16 == 8);
#else
        /*
         * The prologue looks like:
         * push %rbp        # 55
         * mov %rsp, %rbp   # 48 89 e5
         * sub $0x30, %rsp  # 48 83 ec 30
         *
         * The epilogue is:
         * add $0x30, %rsp  # 48 83 c4 30
         * pop %rbp         # 5d
         * retq             # c3
         */
        static const int PROLOGUE_SIZE = 8;
        static const int EPILOGUE_SIZE = 6;
        assert(SCRATCH_BYTES % 16 == 0);
#endif
        static const int CALL_SIZE = 13;

        int patchable_size = num_slots * slot_size;

#ifdef NVALGRIND
        int total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
        addr = malloc(total_size);
#else
        total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
        addr = mmap(NULL, (total_size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE | PROT_EXEC,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        RELEASE_ASSERT(addr != MAP_FAILED, "");
#endif

        // printf("Allocated runtime IC at %p\n", addr);

        std::unique_ptr<ICSetupInfo> setup_info(
            ICSetupInfo::initialize(true, num_slots, slot_size, ICSetupInfo::Generic, NULL));
        uint8_t* pp_start = (uint8_t*)addr + PROLOGUE_SIZE;
        uint8_t* pp_end = pp_start + patchable_size + CALL_SIZE;


        SpillMap _spill_map;
        PatchpointInitializationInfo initialization_info
            = initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */,
                                    std::unordered_set<int>(), _spill_map);
        assert(_spill_map.size() == 0);
        assert(initialization_info.slowpath_start == pp_start + patchable_size);
        assert(initialization_info.slowpath_rtn_addr == pp_end);
        assert(initialization_info.continue_addr == pp_end);

        StackInfo stack_info(SCRATCH_BYTES, 0);
        icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
                                            stack_info, std::unordered_set<int>());

        assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
        // If SCRATCH_BYTES is 8 or less, we could use more compact instruction encodings
        // (push instead of sub), but it doesn't seem worth it for now.
        prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else
        prologue_assem.push(assembler::RBP);
        prologue_assem.mov(assembler::RSP, assembler::RBP);
        prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#endif
        assert(!prologue_assem.hasFailed());
        assert(prologue_assem.isExactlyFull());

        assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
        epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else
        epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
        epilogue_assem.pop(assembler::RBP);
#endif
        epilogue_assem.retq();
        assert(!epilogue_assem.hasFailed());
        assert(epilogue_assem.isExactlyFull());

        // TODO: ideally would be more intelligent about allocation strategies.
        // The code sections should be together and the eh sections together
        eh_frame.writeAndRegister(addr, total_size);
    } else {
        addr = func_addr;
    }
}
Beispiel #3
0
RuntimeIC::RuntimeIC(void* func_addr, int total_size) {
    static StatCounter sc("num_runtime_ics");
    sc.log();

    if (ENABLE_RUNTIME_ICS) {
        assert(SCRATCH_BYTES >= 0);
        assert(SCRATCH_BYTES < 0x80); // This would break both the instruction encoding and the dwarf encoding
        assert(SCRATCH_BYTES % 8 == 0);

#if RUNTIMEICS_OMIT_FRAME_PTR
        /*
         * prologue:
         * sub $0x28, %rsp  # 48 83 ec 28
         *
         * epilogue:
         * add $0x28, %rsp  # 48 83 c4 28
         * retq             # c3
         *
         */
        static const int PROLOGUE_SIZE = 4;
        static const int EPILOGUE_SIZE = 5;
        assert(SCRATCH_BYTES % 16 == 8);
#else
        /*
         * The prologue looks like:
         * push %rbp        # 55
         * mov %rsp, %rbp   # 48 89 e5
         * sub $0x30, %rsp  # 48 83 ec 30
         *
         * The epilogue is:
         * add $0x30, %rsp  # 48 83 c4 30
         * pop %rbp         # 5d
         * retq             # c3
         */
        static const int PROLOGUE_SIZE = 8;
        static const int EPILOGUE_SIZE = 6;
        assert(SCRATCH_BYTES % 16 == 0);
#endif
        static const int CALL_SIZE = 13;

        int total_code_size = total_size - EH_FRAME_SIZE;
        int patchable_size = total_code_size - (PROLOGUE_SIZE + CALL_SIZE + EPILOGUE_SIZE);

        int total_size = total_code_size + EH_FRAME_SIZE;
        assert(total_size == 512 && "we currently only have a 512 byte block memory manager");
        addr = memory_manager_512b.alloc();

        // the memory block contains the EH frame directly followed by the generated machine code.
        void* eh_frame_addr = addr;
        addr = (char*)addr + EH_FRAME_SIZE;

        // printf("Allocated runtime IC at %p\n", addr);

        std::unique_ptr<ICSetupInfo> setup_info(ICSetupInfo::initialize(true, patchable_size, ICSetupInfo::Generic));
        uint8_t* pp_start = (uint8_t*)addr + PROLOGUE_SIZE;
        uint8_t* pp_end = pp_start + patchable_size + CALL_SIZE;


        SpillMap _spill_map;
        PatchpointInitializationInfo initialization_info = initializePatchpoint3(
            func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, LiveOutSet(), _spill_map);
        assert(_spill_map.size() == 0);
        assert(initialization_info.slowpath_start == pp_start + patchable_size);
        assert(initialization_info.slowpath_rtn_addr == pp_end);
        assert(initialization_info.continue_addr == pp_end);

        StackInfo stack_info(SCRATCH_BYTES, 0);
        icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
                                            stack_info, LiveOutSet());

        assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
        // If SCRATCH_BYTES is 8 or less, we could use more compact instruction encodings
        // (push instead of sub), but it doesn't seem worth it for now.
        prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else
        prologue_assem.push(assembler::RBP);
        prologue_assem.mov(assembler::RSP, assembler::RBP);
        prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#endif
        assert(!prologue_assem.hasFailed());
        assert(prologue_assem.isExactlyFull());

        assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
        epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else
        epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
        epilogue_assem.pop(assembler::RBP);
#endif
        epilogue_assem.retq();
        assert(!epilogue_assem.hasFailed());
        assert(epilogue_assem.isExactlyFull());


        if (RUNTIMEICS_OMIT_FRAME_PTR)
            memcpy(eh_frame_addr, _eh_frame_template_ofp, _eh_frame_template_ofp_size);
        else
            memcpy(eh_frame_addr, _eh_frame_template_fp, _eh_frame_template_fp_size);
        register_eh_frame.updateAndRegisterFrameFromTemplate((uint64_t)addr, total_code_size, (uint64_t)eh_frame_addr,
                                                             EH_FRAME_SIZE);
    } else {
        addr = func_addr;
    }
}
Beispiel #4
0
int JitFragmentWriter::finishCompilation() {
    RELEASE_ASSERT(!assembler->hasFailed(), "");

    commit();
    if (failed) {
        blocks_aborted.insert(block);
        code_block.fragmentAbort(false);
        return 0;
    }

    if (assembler->hasFailed()) {
        int bytes_written = assembler->bytesWritten();

        // don't retry JITing very large blocks
        const auto large_block_threshold = JitCodeBlock::code_size - 4096;
        if (bytes_written > large_block_threshold) {
            static StatCounter num_jit_large_blocks("num_baselinejit_skipped_large_blocks");
            num_jit_large_blocks.log();

            blocks_aborted.insert(block);
            code_block.fragmentAbort(false);
        } else {
            // we ran out of space - we allow a retry and set shouldCreateNewBlock to true in order to allocate a new
            // block for the next attempt.
            code_block.fragmentAbort(true /* not_enough_space */);
        }
        return 0;
    }

    block->code = (void*)((uint64_t)entry_code + code_offset);
    block->entry_code = (decltype(block->entry_code))entry_code;

    // if any side exits point to this block patch them to a direct jump to this block
    auto it = block_patch_locations.find(block);
    if (it != block_patch_locations.end()) {
        for (void* patch_location : it->second) {
            assembler::Assembler patch_asm((uint8_t*)patch_location, min_patch_size);
            int64_t offset = (uint64_t)block->code - (uint64_t)patch_location;
            if (isLargeConstant(offset)) {
                patch_asm.mov(assembler::Immediate(block->code), assembler::R11);
                patch_asm.jmpq(assembler::R11);
            } else
                patch_asm.jmp(assembler::JumpDestination::fromStart(offset));
            RELEASE_ASSERT(!patch_asm.hasFailed(), "you may have to increase 'min_patch_size'");
        }
        block_patch_locations.erase(it);
    }

    // if we have a side exit, remember its location for patching
    if (side_exit_patch_location.first) {
        void* patch_location = (uint8_t*)block->code + side_exit_patch_location.second;
        block_patch_locations[side_exit_patch_location.first].push_back(patch_location);
    }

    for (auto&& pp_info : pp_infos) {
        SpillMap _spill_map;
        uint8_t* start_addr = pp_info.start_addr;
        uint8_t* end_addr = pp_info.end_addr;
        PatchpointInitializationInfo initialization_info
            = initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */,
                                    0 /* scratch_size */, std::unordered_set<int>(), _spill_map);
        uint8_t* slowpath_start = initialization_info.slowpath_start;
        uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr;

        std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint(
            start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(),
            pp_info.stack_info, std::unordered_set<int>());
        pp.release();
    }

    void* next_fragment_start = (uint8_t*)block->code + assembler->bytesWritten();
    code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start);
    return num_bytes_exit;
}