address NativeCall::destination() const {
  // Getting the destination of a call isn't safe because that call can
  // be getting patched while you're calling this.  There's only special
  // places where this can be called but not automatically verifiable by
  // checking which locks are held.  The solution is true atomic patching
  // on x86, nyi.
  return return_address() + displacement();
}
// We cannot rely on locks here, since the free-running threads must run at
// full speed.
//
// Used in the runtime linkage of calls; see class CompiledIC.
// (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
void NativeCall::set_destination_mt_safe(address dest) {
  debug_only(verify());
  // Make sure patching code is locked.  No two threads can patch at the same
  // time but one may be executing this code.
  assert(Patching_lock->is_locked() ||
         SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); 
  // Both C1 and C2 should now be generating code which aligns the patched address
  // to be within a single cache line except that C1 does not do the alignment on
  // uniprocessor systems.
  assert(!os::is_MP() || ((uintptr_t)displacement_address() / cache_line_size ==
      ((uintptr_t)displacement_address()+3) / cache_line_size), "destination should be aligned");
  if ((uintptr_t)displacement_address() / cache_line_size ==
      ((uintptr_t)displacement_address()+3) / cache_line_size) {
    // Simple case:  The destination lies within a single cache line.
    set_destination(dest);
  } else if ((uintptr_t)instruction_address() / cache_line_size ==
	     ((uintptr_t)instruction_address()+1) / cache_line_size) {
    // Tricky case:  The instruction prefix lies within a single cache line.
    int disp = dest - return_address();
    int call_opcode = instruction_address()[0];

    // First patch dummy jump in place:
    {
      unsigned char patch_jump[2];
      patch_jump[0] = 0xEB;       // jmp rel8
      patch_jump[1] = 0xFE;       // jmp to self
      assert(sizeof(patch_jump)==sizeof(short), "sanity check");
      *(short*)instruction_address() = *(short*)patch_jump;
    }

    OrderAccess::fence();
    // (Note: We assume any reader which has already started to read
    // the unpatched call will completely read the whole unpatched call
    // without seeing the next writes we are about to make.)

    // Next, patch the last three bytes:
    unsigned char patch_disp[5];
    patch_disp[0] = call_opcode;
    *(int*)&patch_disp[1] = disp;
    assert(sizeof(patch_disp)==instruction_size, "sanity check");
    for (int i = sizeof(short); i < instruction_size; i++)
      instruction_address()[i] = patch_disp[i];

    OrderAccess::fence();
    // (Note: We assume that any reader which reads the opcode we are
    // about to repatch will also read the writes we just made.)

    // Finally, overwrite the jump:
    *(short*)instruction_address() = *(short*)&patch_disp[0];
    
    debug_only(verify());
    guarantee(destination() == dest, "patch succeeded");
  } else {
    // Impossible:  One or the other must be atomically writable.
    ShouldNotReachHere();
  }
  ICache::invalidate_range(instruction_address(), instruction_size);
}
Esempio n. 3
0
// The cache_entries parameter is empty (on cold call site) or has entries
// (on cache miss). Called from assembly with the actual return address.
// Compilation of the inline cache may trigger a GC, which may trigger a
// compaction;
// also, the block containing the return address may now be dead. Use a
// code_root to take care of the details.
// Allocates memory
cell factor_vm::inline_cache_miss(cell return_address_) {
  code_root return_address(return_address_, this);
  bool tail_call_site = tail_call_site_p(return_address.value);

#ifdef PIC_DEBUG
  FACTOR_PRINT("Inline cache miss at "
               << (tail_call_site ? "tail" : "non-tail")
               << " call site 0x" << std::hex << return_address.value
               << std::dec);
  print_callstack();
#endif

  data_root<array> cache_entries(ctx->pop(), this);
  fixnum index = untag_fixnum(ctx->pop());
  data_root<array> methods(ctx->pop(), this);
  data_root<word> generic_word(ctx->pop(), this);
  data_root<object> object(((cell*)ctx->datastack)[-index], this);

  cell pic_size = array_capacity(cache_entries.untagged()) / 2;

  update_pic_transitions(pic_size);

  cell xt = generic_word->entry_point;
  if (pic_size < max_pic_size) {
    cell klass = object_class(object.value());
    cell method = lookup_method(object.value(), methods.value());

    data_root<array> new_cache_entries(
        add_inline_cache_entry(cache_entries.value(), klass, method), this);

    inline_cache_jit jit(generic_word.value(), this);
    jit.emit_inline_cache(index, generic_word.value(), methods.value(),
                          new_cache_entries.value(), tail_call_site);
    code_block* code = jit.to_code_block(CODE_BLOCK_PIC, JIT_FRAME_SIZE);
    initialize_code_block(code);
    xt = code->entry_point();
  }

  // Install the new stub.
  if (return_address.valid) {
    // Since each PIC is only referenced from a single call site,
    // if the old call target was a PIC, we can deallocate it immediately,
    // instead of leaving dead PICs around until the next GC.
    deallocate_inline_cache(return_address.value);
    set_call_target(return_address.value, xt);

#ifdef PIC_DEBUG
    FACTOR_PRINT("Updated " << (tail_call_site ? "tail" : "non-tail")
                 << " call site 0x" << std::hex << return_address.value << std::dec
                 << " with 0x" << std::hex << (cell)xt << std::dec);
    print_callstack();
#endif
  }

  return xt;
}