address NativeCall::destination() const { // Getting the destination of a call isn't safe because that call can // be getting patched while you're calling this. There's only special // places where this can be called but not automatically verifiable by // checking which locks are held. The solution is true atomic patching // on x86, nyi. return return_address() + displacement(); }
// We cannot rely on locks here, since the free-running threads must run at // full speed. // // Used in the runtime linkage of calls; see class CompiledIC. // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) void NativeCall::set_destination_mt_safe(address dest) { debug_only(verify()); // Make sure patching code is locked. No two threads can patch at the same // time but one may be executing this code. assert(Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); // Both C1 and C2 should now be generating code which aligns the patched address // to be within a single cache line except that C1 does not do the alignment on // uniprocessor systems. assert(!os::is_MP() || ((uintptr_t)displacement_address() / cache_line_size == ((uintptr_t)displacement_address()+3) / cache_line_size), "destination should be aligned"); if ((uintptr_t)displacement_address() / cache_line_size == ((uintptr_t)displacement_address()+3) / cache_line_size) { // Simple case: The destination lies within a single cache line. set_destination(dest); } else if ((uintptr_t)instruction_address() / cache_line_size == ((uintptr_t)instruction_address()+1) / cache_line_size) { // Tricky case: The instruction prefix lies within a single cache line. int disp = dest - return_address(); int call_opcode = instruction_address()[0]; // First patch dummy jump in place: { unsigned char patch_jump[2]; patch_jump[0] = 0xEB; // jmp rel8 patch_jump[1] = 0xFE; // jmp to self assert(sizeof(patch_jump)==sizeof(short), "sanity check"); *(short*)instruction_address() = *(short*)patch_jump; } OrderAccess::fence(); // (Note: We assume any reader which has already started to read // the unpatched call will completely read the whole unpatched call // without seeing the next writes we are about to make.) // Next, patch the last three bytes: unsigned char patch_disp[5]; patch_disp[0] = call_opcode; *(int*)&patch_disp[1] = disp; assert(sizeof(patch_disp)==instruction_size, "sanity check"); for (int i = sizeof(short); i < instruction_size; i++) instruction_address()[i] = patch_disp[i]; OrderAccess::fence(); // (Note: We assume that any reader which reads the opcode we are // about to repatch will also read the writes we just made.) // Finally, overwrite the jump: *(short*)instruction_address() = *(short*)&patch_disp[0]; debug_only(verify()); guarantee(destination() == dest, "patch succeeded"); } else { // Impossible: One or the other must be atomically writable. ShouldNotReachHere(); } ICache::invalidate_range(instruction_address(), instruction_size); }
// The cache_entries parameter is empty (on cold call site) or has entries // (on cache miss). Called from assembly with the actual return address. // Compilation of the inline cache may trigger a GC, which may trigger a // compaction; // also, the block containing the return address may now be dead. Use a // code_root to take care of the details. // Allocates memory cell factor_vm::inline_cache_miss(cell return_address_) { code_root return_address(return_address_, this); bool tail_call_site = tail_call_site_p(return_address.value); #ifdef PIC_DEBUG FACTOR_PRINT("Inline cache miss at " << (tail_call_site ? "tail" : "non-tail") << " call site 0x" << std::hex << return_address.value << std::dec); print_callstack(); #endif data_root<array> cache_entries(ctx->pop(), this); fixnum index = untag_fixnum(ctx->pop()); data_root<array> methods(ctx->pop(), this); data_root<word> generic_word(ctx->pop(), this); data_root<object> object(((cell*)ctx->datastack)[-index], this); cell pic_size = array_capacity(cache_entries.untagged()) / 2; update_pic_transitions(pic_size); cell xt = generic_word->entry_point; if (pic_size < max_pic_size) { cell klass = object_class(object.value()); cell method = lookup_method(object.value(), methods.value()); data_root<array> new_cache_entries( add_inline_cache_entry(cache_entries.value(), klass, method), this); inline_cache_jit jit(generic_word.value(), this); jit.emit_inline_cache(index, generic_word.value(), methods.value(), new_cache_entries.value(), tail_call_site); code_block* code = jit.to_code_block(CODE_BLOCK_PIC, JIT_FRAME_SIZE); initialize_code_block(code); xt = code->entry_point(); } // Install the new stub. if (return_address.valid) { // Since each PIC is only referenced from a single call site, // if the old call target was a PIC, we can deallocate it immediately, // instead of leaving dead PICs around until the next GC. deallocate_inline_cache(return_address.value); set_call_target(return_address.value, xt); #ifdef PIC_DEBUG FACTOR_PRINT("Updated " << (tail_call_site ? "tail" : "non-tail") << " call site 0x" << std::hex << return_address.value << std::dec << " with 0x" << std::hex << (cell)xt << std::dec); print_callstack(); #endif } return xt; }