void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
  //---------------slow case: call to native-----------------
  __ bind(_entry);
  // Figure out where the args should go
  // This should really convert the IntrinsicID to the Method* and signature
  // but I don't know how to do that.
  //
  VMRegPair args[5];
  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
  SharedRuntime::java_calling_convention(signature, args, 5, true);

  // push parameters
  // (src, src_pos, dest, destPos, length)
  Register r[5];
  r[0] = src()->as_register();
  r[1] = src_pos()->as_register();
  r[2] = dst()->as_register();
  r[3] = dst_pos()->as_register();
  r[4] = length()->as_register();

  // next registers will get stored on the stack
  for (int i = 0; i < 5 ; i++ ) {
    VMReg r_1 = args[i].first();
    if (r_1->is_stack()) {
      int st_off = r_1->reg2stack() * wordSize;
      __ movptr (Address(rsp, st_off), r[i]);
    } else {
      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
    }
  }

  ce->align_call(lir_static_call);

  ce->emit_static_call_stub();
  if (ce->compilation()->bailed_out()) {
    return; // CodeCache is full
  }
  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
                         relocInfo::static_call_type);
  __ call(resolve);
  ce->add_call_info_here(info());

#ifndef PRODUCT
  __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
#endif

  __ jmp(_continuation);
}
// These stubs are used by the compiler only.
// Argument registers, which must be preserved:
//   rcx - receiver (always first argument)
//   rdx - second argument (if any)
// Other registers that might be usable:
//   rax - inline cache register (is interface for itable stub)
//   rbx - method (used when calling out to interpreter)
// Available now, but may become callee-save at some point:
//   rsi, rdi
// Note that rax and rdx are also used for return values.
//
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
  const int i486_code_length = VtableStub::pd_code_size_limit(true);
  VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index);
  // Can be NULL if there is no free space in the code cache.
  if (s == NULL) {
    return NULL;
  }

  ResourceMark rm;
  CodeBuffer cb(s->entry_point(), i486_code_length);
  MacroAssembler* masm = new MacroAssembler(&cb);

#ifndef PRODUCT

  if (CountCompiledCalls) {
    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
  }
#endif /* PRODUCT */

  // get receiver (need to skip return address on top of stack)
  assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");

  // get receiver klass
  address npe_addr = __ pc();
  __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));

#ifndef PRODUCT
  if (DebugVtables) {
    Label L;
    // check offset vs vtable length
    __ cmpl(Address(rax, InstanceKlass::vtable_length_offset()*wordSize), vtable_index*vtableEntry::size());
    __ jcc(Assembler::greater, L);
    __ movl(rbx, vtable_index);
    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
    __ bind(L);
  }
#endif // PRODUCT

  const Register method = rbx;

  // load Method* and target address
  __ lookup_virtual_method(rax, vtable_index, method);

  if (DebugVtables) {
    Label L;
    __ cmpptr(method, (int32_t)NULL_WORD);
    __ jcc(Assembler::equal, L);
    __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
    __ jcc(Assembler::notZero, L);
    __ stop("Vtable entry is NULL");
    __ bind(L);
  }

  // rax,: receiver klass
  // method (rbx): Method*
  // rcx: receiver
  address ame_addr = __ pc();
  __ jmp( Address(method, Method::from_compiled_offset()));

  masm->flush();

  if (PrintMiscellaneous && (WizardMode || Verbose)) {
    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
                  vtable_index, s->entry_point(),
                  (int)(s->code_end() - s->entry_point()),
                  (int)(s->code_end() - __ pc()));
  }
  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
  // shut the door on sizing bugs
  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
  assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");

  s->set_exception_points(npe_addr, ame_addr);
  return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
  // Note well: pd_code_size_limit is the absolute minimum we can get away with.  If you
  //            add code here, bump the code stub size returned by pd_code_size_limit!
  const int i486_code_length = VtableStub::pd_code_size_limit(false);
  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
  // Can be NULL if there is no free space in the code cache.
  if (s == NULL) {
    return NULL;
  }

  ResourceMark rm;
  CodeBuffer cb(s->entry_point(), i486_code_length);
  MacroAssembler* masm = new MacroAssembler(&cb);

  // Entry arguments:
  //  rax,: Interface
  //  rcx: Receiver

#ifndef PRODUCT
  if (CountCompiledCalls) {
    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
  }
#endif /* PRODUCT */
  // get receiver (need to skip return address on top of stack)

  assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");

  // get receiver klass (also an implicit null-check)
  address npe_addr = __ pc();
  __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes()));

  // Most registers are in use; we'll use rax, rbx, rsi, rdi
  // (If we need to make rsi, rdi callee-save, do a push/pop here.)
  const Register method = rbx;
  Label throw_icce;

  // Get Method* and entrypoint for compiler
  __ lookup_interface_method(// inputs: rec. class, interface, itable index
                             rsi, rax, itable_index,
                             // outputs: method, scan temp. reg
                             method, rdi,
                             throw_icce);

  // method (rbx): Method*
  // rcx: receiver

#ifdef ASSERT
  if (DebugVtables) {
      Label L1;
      __ cmpptr(method, (int32_t)NULL_WORD);
      __ jcc(Assembler::equal, L1);
      __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
      __ jcc(Assembler::notZero, L1);
      __ stop("Method* is null");
      __ bind(L1);
    }
#endif // ASSERT

  address ame_addr = __ pc();
  __ jmp(Address(method, Method::from_compiled_offset()));

  __ bind(throw_icce);
  __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
  masm->flush();

  if (PrintMiscellaneous && (WizardMode || Verbose)) {
    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
                  itable_index, s->entry_point(),
                  (int)(s->code_end() - s->entry_point()),
                  (int)(s->code_end() - __ pc()));
  }
  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
  // shut the door on sizing bugs
  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");

  s->set_exception_points(npe_addr, ame_addr);
  return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
  // Note well: pd_code_size_limit is the absolute minimum we can get
  // away with.  If you add code here, bump the code stub size
  // returned by pd_code_size_limit!
  const int amd64_code_length = VtableStub::pd_code_size_limit(false);
  VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
  ResourceMark rm;
  CodeBuffer cb(s->entry_point(), amd64_code_length);
  MacroAssembler* masm = new MacroAssembler(&cb);

#ifndef PRODUCT
  if (CountCompiledCalls) {
    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
  }
#endif

  // Entry arguments:
  //  rax: Interface
  //  j_rarg0: Receiver

  // Free registers (non-args) are rax (interface), rbx

  // get receiver (need to skip return address on top of stack)

  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
  // get receiver klass (also an implicit null-check)
  address npe_addr = __ pc();

  // Most registers are in use; we'll use rax, rbx, r10, r11
  // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
  __ load_klass(r10, j_rarg0);

  // If we take a trap while this arg is on the stack we will not
  // be able to walk the stack properly. This is not an issue except
  // when there are mistakes in this assembly code that could generate
  // a spurious fault. Ask me how I know...

  const Register method = rbx;
  Label throw_icce;

  // Get methodOop and entrypoint for compiler
  __ lookup_interface_method(// inputs: rec. class, interface, itable index
                             r10, rax, itable_index,
                             // outputs: method, scan temp. reg
                             method, r11,
                             throw_icce);

  // method (rbx): methodOop
  // j_rarg0: receiver

#ifdef ASSERT
  if (DebugVtables) {
    Label L2;
    __ cmpptr(method, (int32_t)NULL_WORD);
    __ jcc(Assembler::equal, L2);
    __ cmpptr(Address(method, methodOopDesc::from_compiled_offset()), (int32_t)NULL_WORD);
    __ jcc(Assembler::notZero, L2);
    __ stop("compiler entrypoint is null");
    __ bind(L2);
  }
#endif // ASSERT

  // rbx: methodOop
  // j_rarg0: receiver
  address ame_addr = __ pc();
  __ jmp(Address(method, methodOopDesc::from_compiled_offset()));

  __ bind(throw_icce);
  __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));

  __ flush();

  if (PrintMiscellaneous && (WizardMode || Verbose)) {
    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
                  itable_index, s->entry_point(),
                  (int)(s->code_end() - s->entry_point()),
                  (int)(s->code_end() - __ pc()));
  }
  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
  // shut the door on sizing bugs
  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");

  s->set_exception_points(npe_addr, ame_addr);
  return s;
}