Пример #1
0
void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
    // At this point we know that marking is in progress.
    // If do_load() is true then we have to emit the
    // load of the previous value; otherwise it has already
    // been loaded into _pre_val.

    __ bind(_entry);

    assert(pre_val()->is_register(), "Precondition.");
    Register pre_val_reg = pre_val()->as_register();

    if (do_load()) {
        ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
    }

    if (__ is_in_wdisp16_range(_continuation)) {
        __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, _continuation);
    } else {
        __ cmp(pre_val_reg, G0);
        __ brx(Assembler::equal, false, Assembler::pn, _continuation);
    }
    __ delayed()->nop();

    __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
    __ delayed()->mov(pre_val_reg, G4);
    __ br(Assembler::always, false, Assembler::pt, _continuation);
    __ delayed()->nop();

}
Пример #2
0
void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) {
    assert_different_registers(Rmark, Roop, Rbox);

    Label done;

    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
    assert(mark_addr.disp() == 0, "cas must take a zero displacement");

    if (UseBiasedLocking) {
        // load the object out of the BasicObjectLock
        ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop);
        verify_oop(Roop);
        biased_locking_exit(mark_addr, Rmark, done);
    }
    // Test first it it is a fast recursive unlock
    ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
    br_null_short(Rmark, Assembler::pt, done);
    if (!UseBiasedLocking) {
        // load object
        ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop);
        verify_oop(Roop);
    }

    // Check if it is still a light weight lock, this is is true if we see
    // the stack address of the basicLock in the markOop of the object
    cas_ptr(mark_addr.base(), Rbox, Rmark);
    cmp(Rbox, Rmark);

    brx(Assembler::notEqual, false, Assembler::pn, slow_case);
    delayed()->nop();
    // Done
    bind(done);
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
  Argument  jni_arg(jni_offset(), false);
  Argument java_arg(    offset(), true);
  Register    Rtmp1 = O0;
  Register    Rtmp2 =  jni_arg.is_register() ?  jni_arg.as_register() : O0;
  Register    Rtmp3 =  G3_scratch;

  // the handle for a receiver will never be null
  bool do_NULL_check = offset() != 0 || is_static();

  Address     h_arg = Address(Llocals, Interpreter::local_offset_in_bytes(offset()));
  __ ld_ptr(h_arg, Rtmp1);
  if (!do_NULL_check) {
    __ add(h_arg.base(), h_arg.disp(), Rtmp2);
  } else {
    if (Rtmp1 == Rtmp2)
          __ tst(Rtmp1);
    else  __ addcc(G0, Rtmp1, Rtmp2); // optimize mov/test pair
    Label L;
    __ brx(Assembler::notZero, true, Assembler::pt, L);
    __ delayed()->add(h_arg.base(), h_arg.disp(), Rtmp2);
    __ bind(L);
  }
  __ store_ptr_argument(Rtmp2, jni_arg);    // this is often a no-op
}
Пример #4
0
OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
  // make a frame and preserve the caller's caller-save registers
  OopMap* oop_map = save_live_registers(sasm);

  // call the runtime patching routine, returns non-zero if nmethod got deopted.
  int call_offset = __ call_RT(noreg, noreg, target);
  OopMapSet* oop_maps = new OopMapSet();
  oop_maps->add_gc_map(call_offset, oop_map);

  // re-execute the patched instruction or, if the nmethod was deoptmized, return to the
  // deoptimization handler entry that will cause re-execution of the current bytecode
  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
  assert(deopt_blob != NULL, "deoptimization blob must have been created");

  Label no_deopt;
  __ tst(O0);
  __ brx(Assembler::equal, false, Assembler::pt, no_deopt);
  __ delayed()->nop();

  // return to the deoptimization handler entry for unpacking and rexecute
  // if we simply returned the we'd deopt as if any call we patched had just
  // returned.

  restore_live_registers(sasm);
  __ restore();
  __ br(Assembler::always, false, Assembler::pt, deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
  __ delayed()->nop();

  __ bind(no_deopt);
  restore_live_registers(sasm);
  __ ret();
  __ delayed()->restore();

  return oop_maps;
}
Пример #5
0
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
  Argument  jni_arg(jni_offset(), false);
  Register  Rtmp = O0;

#ifdef ASSERT
  if (TaggedStackInterpreter) {
    // check at least one tag is okay
    Label ok;
    __ ld_ptr(Llocals, Interpreter::local_tag_offset_in_bytes(offset() + 1), Rtmp);
    __ cmp(Rtmp, G0);
    __ brx(Assembler::equal, false, Assembler::pt, ok);
    __ delayed()->nop();
    __ stop("Native object has bad tag value");
    __ bind(ok);
  }
#endif // ASSERT

#ifdef _LP64
  __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_long_argument(Rtmp, jni_arg);
#else
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_argument(Rtmp, jni_arg);
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 0), Rtmp);
  Argument successor(jni_arg.successor());
  __ store_argument(Rtmp, successor);
#endif
}
Пример #6
0
void C1_MacroAssembler::initialize_body(Register base, Register index) {
    assert_different_registers(base, index);
    Label loop;
    bind(loop);
    subcc(index, HeapWordSize, index);
    brx(Assembler::greaterEqual, true, Assembler::pt, loop);
    delayed()->st_ptr(G0, base, index);
}
Пример #7
0
void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case) {
    assert_different_registers(Rmark, Roop, Rbox, Rscratch);

    Label done;

    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());

    // The following move must be the first instruction of emitted since debug
    // information may be generated for it.
    // Load object header
    ld_ptr(mark_addr, Rmark);

    verify_oop(Roop);

    // save object being locked into the BasicObjectLock
    st_ptr(Roop, Rbox, BasicObjectLock::obj_offset_in_bytes());

    if (UseBiasedLocking) {
        biased_locking_enter(Roop, Rmark, Rscratch, done, &slow_case);
    }

    // Save Rbox in Rscratch to be used for the cas operation
    mov(Rbox, Rscratch);

    // and mark it unlocked
    or3(Rmark, markOopDesc::unlocked_value, Rmark);

    // save unlocked object header into the displaced header location on the stack
    st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());

    // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
    assert(mark_addr.disp() == 0, "cas must take a zero displacement");
    cas_ptr(mark_addr.base(), Rmark, Rscratch);
    // if compare/exchange succeeded we found an unlocked object and we now have locked it
    // hence we are done
    cmp(Rmark, Rscratch);
    brx(Assembler::equal, false, Assembler::pt, done);
    delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
    // we did not find an unlocked object so see if this is a recursive case
    // sub(Rscratch, SP, Rscratch);
    assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
    andcc(Rscratch, 0xfffff003, Rscratch);
    brx(Assembler::notZero, false, Assembler::pn, slow_case);
    delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
    bind(done);
}
Пример #8
0
static void verify_argslot(MacroAssembler* _masm, Register argslot_reg, Register temp_reg, const char* error_message) {
  // Verify that argslot lies within (Gargs, FP].
  Label L_ok, L_bad;
#ifdef _LP64
  __ add(FP, STACK_BIAS, temp_reg);
  __ cmp(argslot_reg, temp_reg);
#else
  __ cmp(argslot_reg, FP);
#endif
  __ brx(Assembler::greaterUnsigned, false, Assembler::pn, L_bad);
  __ delayed()->nop();
  __ cmp(Gargs, argslot_reg);
  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok);
  __ delayed()->nop();
  __ bind(L_bad);
  __ stop(error_message);
  __ bind(L_ok);
}
Пример #9
0
void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
  Argument  jni_arg(jni_offset(), false);
  Argument java_arg(    offset(), true);
  Register    Rtmp1 = O0;
  Register    Rtmp2 =  jni_arg.is_register() ?  jni_arg.as_register() : O0;
  Register    Rtmp3 =  G3_scratch;

  // the handle for a receiver will never be null
  bool do_NULL_check = offset() != 0 || is_static();

  Address     h_arg = Address(Llocals, Interpreter::local_offset_in_bytes(offset()));
  __ ld_ptr(h_arg, Rtmp1);
#ifdef ASSERT
  if (TaggedStackInterpreter) {
    // check we have the obj and not the tag
    Label ok;
    __ mov(frame::TagReference, Rtmp3);
    __ cmp(Rtmp1, Rtmp3);
    __ brx(Assembler::notEqual, true, Assembler::pt, ok);
    __ delayed()->nop();
    __ stop("Native object passed tag by mistake");
    __ bind(ok);
  }
#endif // ASSERT
  if (!do_NULL_check) {
    __ add(h_arg.base(), h_arg.disp(), Rtmp2);
  } else {
    if (Rtmp1 == Rtmp2)
          __ tst(Rtmp1);
    else  __ addcc(G0, Rtmp1, Rtmp2); // optimize mov/test pair
    Label L;
    __ brx(Assembler::notZero, true, Assembler::pt, L);
    __ delayed()->add(h_arg.base(), h_arg.disp(), Rtmp2);
    __ bind(L);
  }
  __ store_ptr_argument(Rtmp2, jni_arg);    // this is often a no-op
}
Пример #10
0
void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
    __ bind(_entry);

    assert(addr()->is_register(), "Precondition.");
    assert(new_val()->is_register(), "Precondition.");
    Register addr_reg = addr()->as_pointer_register();
    Register new_val_reg = new_val()->as_register();

    if (__ is_in_wdisp16_range(_continuation)) {
        __ br_null(new_val_reg, /*annul*/false, Assembler::pt, _continuation);
    } else {
        __ cmp(new_val_reg, G0);
        __ brx(Assembler::equal, false, Assembler::pn, _continuation);
    }
    __ delayed()->nop();

    __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
    __ delayed()->mov(addr_reg, G4);
    __ br(Assembler::always, false, Assembler::pt, _continuation);
    __ delayed()->nop();
}
Пример #11
0
void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
  __ bind(_entry);

  assert(pre_val()->is_register(), "Precondition.");

  Register pre_val_reg = pre_val()->as_register();

  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
  if (__ is_in_wdisp16_range(_continuation)) {
    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
                      pre_val_reg, _continuation);
  } else {
    __ cmp(pre_val_reg, G0);
    __ brx(Assembler::equal, false, Assembler::pn, _continuation);
  }
  __ delayed()->nop();

  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
  __ delayed()->mov(pre_val_reg, G4);
  __ br(Assembler::always, false, Assembler::pt, _continuation);
  __ delayed()->nop();

}
Пример #12
0
void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
    // At this point we know that offset == referent_offset.
    //
    // So we might have to emit:
    //   if (src == null) goto continuation.
    //
    // and we definitely have to emit:
    //   if (klass(src).reference_type == REF_NONE) goto continuation
    //   if (!marking_active) goto continuation
    //   if (pre_val == null) goto continuation
    //   call pre_barrier(pre_val)
    //   goto continuation
    //
    __ bind(_entry);

    assert(src()->is_register(), "sanity");
    Register src_reg = src()->as_register();

    if (gen_src_check()) {
        // The original src operand was not a constant.
        // Generate src == null?
        if (__ is_in_wdisp16_range(_continuation)) {
            __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
        } else {
            __ cmp(src_reg, G0);
            __ brx(Assembler::equal, false, Assembler::pt, _continuation);
        }
        __ delayed()->nop();
    }

    // Generate src->_klass->_reference_type() == REF_NONE)?
    assert(tmp()->is_register(), "sanity");
    Register tmp_reg = tmp()->as_register();

    __ load_klass(src_reg, tmp_reg);

    Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
    __ ldub(ref_type_adr, tmp_reg);

    // _reference_type field is of type ReferenceType (enum)
    assert(REF_NONE == 0, "check this code");
    __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
    __ delayed()->nop();

    // Is marking active?
    assert(thread()->is_register(), "precondition");
    Register thread_reg = thread()->as_pointer_register();

    Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
                        PtrQueue::byte_offset_of_active()));

    if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
        __ ld(in_progress, tmp_reg);
    } else {
        assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
        __ ldsb(in_progress, tmp_reg);
    }

    __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
    __ delayed()->nop();

    // val == null?
    assert(val()->is_register(), "Precondition.");
    Register val_reg = val()->as_register();

    if (__ is_in_wdisp16_range(_continuation)) {
        __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
    } else {
        __ cmp(val_reg, G0);
        __ brx(Assembler::equal, false, Assembler::pt, _continuation);
    }
    __ delayed()->nop();

    __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
    __ delayed()->mov(val_reg, G4);
    __ br(Assembler::always, false, Assembler::pt, _continuation);
    __ delayed()->nop();
}
Пример #13
0
// Helper to insert argument slots into the stack.
// arg_slots must be a multiple of stack_move_unit() and <= 0
void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
                                     RegisterOrConstant arg_slots,
                                     int arg_mask,
                                     Register argslot_reg,
                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
  assert(temp3_reg != noreg, "temp3 required");
  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));

#ifdef ASSERT
  verify_argslot(_masm, argslot_reg, temp_reg, "insertion point must fall within current frame");
  if (arg_slots.is_register()) {
    Label L_ok, L_bad;
    __ cmp(arg_slots.as_register(), (int32_t) NULL_WORD);
    __ br(Assembler::greater, false, Assembler::pn, L_bad);
    __ delayed()->nop();
    __ btst(-stack_move_unit() - 1, arg_slots.as_register());
    __ br(Assembler::zero, false, Assembler::pt, L_ok);
    __ delayed()->nop();
    __ bind(L_bad);
    __ stop("assert arg_slots <= 0 and clear low bits");
    __ bind(L_ok);
  } else {
    assert(arg_slots.as_constant() <= 0, "");
    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
  }
#endif // ASSERT

#ifdef _LP64
  if (arg_slots.is_register()) {
    // Was arg_slots register loaded as signed int?
    Label L_ok;
    __ sll(arg_slots.as_register(), BitsPerInt, temp_reg);
    __ sra(temp_reg, BitsPerInt, temp_reg);
    __ cmp(arg_slots.as_register(), temp_reg);
    __ br(Assembler::equal, false, Assembler::pt, L_ok);
    __ delayed()->nop();
    __ stop("arg_slots register not loaded as signed int");
    __ bind(L_ok);
  }
#endif

  // Make space on the stack for the inserted argument(s).
  // Then pull down everything shallower than argslot_reg.
  // The stacked return address gets pulled down with everything else.
  // That is, copy [sp, argslot) downward by -size words.  In pseudo-code:
  //   sp -= size;
  //   for (temp = sp + size; temp < argslot; temp++)
  //     temp[-size] = temp[0]
  //   argslot -= size;
  RegisterOrConstant offset = __ regcon_sll_ptr(arg_slots, LogBytesPerWord, temp3_reg);

  // Keep the stack pointer 2*wordSize aligned.
  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
  RegisterOrConstant masked_offset = __ regcon_andn_ptr(offset, TwoWordAlignmentMask, temp_reg);
  __ add(SP, masked_offset, SP);

  __ mov(Gargs, temp_reg);  // source pointer for copy
  __ add(Gargs, offset, Gargs);

  {
    Label loop;
    __ bind(loop);
    // pull one word down each time through the loop
    __ ld_ptr(Address(temp_reg, 0), temp2_reg);
    __ st_ptr(temp2_reg, Address(temp_reg, offset));
    __ add(temp_reg, wordSize, temp_reg);
    __ cmp(temp_reg, argslot_reg);
    __ brx(Assembler::less, false, Assembler::pt, loop);
    __ delayed()->nop();  // FILLME
  }

  // Now move the argslot down, to point to the opened-up space.
  __ add(argslot_reg, offset, argslot_reg);
}
inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
  insert_nop_after_cbcond();
  brx(c, a, p, target(L));
}
Пример #15
0
// LP64 passes floating point arguments in F1, F3, F5, etc. instead of
// O0, O1, O2 etc..
// Doubles are passed in D0, D2, D4
// We store the signature of the first 16 arguments in the first argument
// slot because it will be overwritten prior to calling the native
// function, with the pointer to the JNIEnv.
// If LP64 there can be up to 16 floating point arguments in registers
// or 6 integer registers.
address AbstractInterpreterGenerator::generate_slow_signature_handler() {

  enum {
    non_float  = 0,
    float_sig  = 1,
    double_sig = 2,
    sig_mask   = 3
  };

  address entry = __ pc();
  Argument argv(0, true);

  // We are in the jni transition frame. Save the last_java_frame corresponding to the
  // outer interpreter frame
  //
  __ set_last_Java_frame(FP, noreg);
  // make sure the interpreter frame we've pushed has a valid return pc
  __ mov(O7, I7);
  __ mov(Lmethod, G3_scratch);
  __ mov(Llocals, G4_scratch);
  __ save_frame(0);
  __ mov(G2_thread, L7_thread_cache);
  __ add(argv.address_in_frame(), O3);
  __ mov(G2_thread, O0);
  __ mov(G3_scratch, O1);
  __ call(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), relocInfo::runtime_call_type);
  __ delayed()->mov(G4_scratch, O2);
  __ mov(L7_thread_cache, G2_thread);
  __ reset_last_Java_frame();


  // load the register arguments (the C code packed them as varargs)
  Address Sig = argv.address_in_frame();        // Argument 0 holds the signature
  __ ld_ptr( Sig, G3_scratch );                   // Get register argument signature word into G3_scratch
  __ mov( G3_scratch, G4_scratch);
  __ srl( G4_scratch, 2, G4_scratch);             // Skip Arg 0
  Label done;
  for (Argument ldarg = argv.successor(); ldarg.is_float_register(); ldarg = ldarg.successor()) {
    Label NonFloatArg;
    Label LoadFloatArg;
    Label LoadDoubleArg;
    Label NextArg;
    Address a = ldarg.address_in_frame();
    __ andcc(G4_scratch, sig_mask, G3_scratch);
    __ br(Assembler::zero, false, Assembler::pt, NonFloatArg);
    __ delayed()->nop();

    __ cmp(G3_scratch, float_sig );
    __ br(Assembler::equal, false, Assembler::pt, LoadFloatArg);
    __ delayed()->nop();

    __ cmp(G3_scratch, double_sig );
    __ br(Assembler::equal, false, Assembler::pt, LoadDoubleArg);
    __ delayed()->nop();

    __ bind(NonFloatArg);
    // There are only 6 integer register arguments!
    if ( ldarg.is_register() )
      __ ld_ptr(ldarg.address_in_frame(), ldarg.as_register());
    else {
    // Optimization, see if there are any more args and get out prior to checking
    // all 16 float registers.  My guess is that this is rare.
    // If is_register is false, then we are done the first six integer args.
      __ tst(G4_scratch);
      __ brx(Assembler::zero, false, Assembler::pt, done);
      __ delayed()->nop();

    }
    __ ba(false, NextArg);
    __ delayed()->srl( G4_scratch, 2, G4_scratch );

    __ bind(LoadFloatArg);
    __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4);
    __ ba(false, NextArg);
    __ delayed()->srl( G4_scratch, 2, G4_scratch );

    __ bind(LoadDoubleArg);
    __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() );
    __ ba(false, NextArg);
    __ delayed()->srl( G4_scratch, 2, G4_scratch );

    __ bind(NextArg);

  }

  __ bind(done);
  __ ret();
  __ delayed()->
     restore(O0, 0, Lscratch);  // caller's Lscratch gets the result handler
  return entry;
}
Пример #16
0
// Helper to remove argument slots from the stack.
// arg_slots must be a multiple of stack_move_unit() and >= 0
void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
                                     RegisterOrConstant arg_slots,
                                     Register argslot_reg,
                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
  assert(temp3_reg != noreg, "temp3 required");
  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));

  RegisterOrConstant offset = __ regcon_sll_ptr(arg_slots, LogBytesPerWord, temp3_reg);

#ifdef ASSERT
  // Verify that [argslot..argslot+size) lies within (Gargs, FP).
  __ add(argslot_reg, offset, temp2_reg);
  verify_argslot(_masm, temp2_reg, temp_reg, "deleted argument(s) must fall within current frame");
  if (arg_slots.is_register()) {
    Label L_ok, L_bad;
    __ cmp(arg_slots.as_register(), (int32_t) NULL_WORD);
    __ br(Assembler::less, false, Assembler::pn, L_bad);
    __ delayed()->nop();
    __ btst(-stack_move_unit() - 1, arg_slots.as_register());
    __ br(Assembler::zero, false, Assembler::pt, L_ok);
    __ delayed()->nop();
    __ bind(L_bad);
    __ stop("assert arg_slots >= 0 and clear low bits");
    __ bind(L_ok);
  } else {
    assert(arg_slots.as_constant() >= 0, "");
    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
  }
#endif // ASSERT

  // Pull up everything shallower than argslot.
  // Then remove the excess space on the stack.
  // The stacked return address gets pulled up with everything else.
  // That is, copy [sp, argslot) upward by size words.  In pseudo-code:
  //   for (temp = argslot-1; temp >= sp; --temp)
  //     temp[size] = temp[0]
  //   argslot += size;
  //   sp += size;
  __ sub(argslot_reg, wordSize, temp_reg);  // source pointer for copy
  {
    Label loop;
    __ bind(loop);
    // pull one word up each time through the loop
    __ ld_ptr(Address(temp_reg, 0), temp2_reg);
    __ st_ptr(temp2_reg, Address(temp_reg, offset));
    __ sub(temp_reg, wordSize, temp_reg);
    __ cmp(temp_reg, Gargs);
    __ brx(Assembler::greaterEqual, false, Assembler::pt, loop);
    __ delayed()->nop();  // FILLME
  }

  // Now move the argslot up, to point to the just-copied block.
  __ add(Gargs, offset, Gargs);
  // And adjust the argslot address to point at the deletion point.
  __ add(argslot_reg, offset, argslot_reg);

  // Keep the stack pointer 2*wordSize aligned.
  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
  RegisterOrConstant masked_offset = __ regcon_andn_ptr(offset, TwoWordAlignmentMask, temp_reg);
  __ add(SP, masked_offset, SP);
}
Пример #17
0
void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list,
                                                   void** vtable,
                                                   char** md_top,
                                                   char* md_end,
                                                   char** mc_top,
                                                   char* mc_end) {

  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
  *(intptr_t *)(*md_top) = vtable_bytes;
  *md_top += sizeof(intptr_t);
  void** dummy_vtable = (void**)*md_top;
  *vtable = dummy_vtable;
  *md_top += vtable_bytes;

  guarantee(*md_top <= md_end, "Insufficient space for vtables.");

  // Get ready to generate dummy methods.

  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
  MacroAssembler* masm = new MacroAssembler(&cb);

  Label common_code;
  for (int i = 0; i < vtbl_list_size; ++i) {
    for (int j = 0; j < num_virtuals; ++j) {
      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
      __ save(SP, -256, SP);
      __ brx(Assembler::always, false, Assembler::pt, common_code);

      // Load L0 with a value indicating vtable/offset pair.
      // -- bits[ 7..0]  (8 bits) which virtual method in table?
      // -- bits[12..8]  (5 bits) which virtual method table?
      // -- must fit in 13-bit instruction immediate field.
      __ delayed()->set((i << 8) + j, L0);
    }
  }

  __ bind(common_code);

  // Expecting to be called with the "this" pointer in O0/I0 (where
  // "this" is a Klass object).  In addition, L0 was set (above) to
  // identify the method and table.

  // Look up the correct vtable pointer.

  __ set((intptr_t)vtbl_list, L2);      // L2 = address of new vtable list.
  __ srl(L0, 8, L3);                    // Isolate L3 = vtable identifier.
  __ sll(L3, LogBytesPerWord, L3);
  __ ld_ptr(L2, L3, L3);                // L3 = new (correct) vtable pointer.
  __ st_ptr(L3, Address(I0, 0));        // Save correct vtable ptr in entry.

  // Restore registers and jump to the correct method;

  __ and3(L0, 255, L4);                 // Isolate L3 = method offset;.
  __ sll(L4, LogBytesPerWord, L4);
  __ ld_ptr(L3, L4, L4);                // Get address of correct virtual method
  __ jmpl(L4, 0, G0);                   // Jump to correct method.
  __ delayed()->restore();              // Restore registers.

  __ flush();
  *mc_top = (char*)__ pc();

  guarantee(*mc_top <= mc_end, "Insufficient space for method wrappers.");
}
Пример #18
0
OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {

  OopMapSet* oop_maps = NULL;
  // for better readability
  const bool must_gc_arguments = true;
  const bool dont_gc_arguments = false;

  // stub code & info for the different stubs
  switch (id) {
    case forward_exception_id:
      {
        // we're handling an exception in the context of a compiled
        // frame.  The registers have been saved in the standard
        // places.  Perform an exception lookup in the caller and
        // dispatch to the handler if found.  Otherwise unwind and
        // dispatch to the callers exception handler.

        oop_maps = new OopMapSet();
        OopMap* oop_map = generate_oop_map(sasm, true);

        // transfer the pending exception to the exception_oop
        __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception);
        __ ld_ptr(Oexception, 0, G0);
        __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset()));
        __ add(I7, frame::pc_return_offset, Oissuing_pc);

        generate_handle_exception(sasm, oop_maps, oop_map);
        __ should_not_reach_here();
      }
      break;

    case new_instance_id:
    case fast_new_instance_id:
    case fast_new_instance_init_check_id:
      {
        Register G5_klass = G5; // Incoming
        Register O0_obj   = O0; // Outgoing

        if (id == new_instance_id) {
          __ set_info("new_instance", dont_gc_arguments);
        } else if (id == fast_new_instance_id) {
          __ set_info("fast new_instance", dont_gc_arguments);
        } else {
          assert(id == fast_new_instance_init_check_id, "bad StubID");
          __ set_info("fast new_instance init check", dont_gc_arguments);
        }

        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
            UseTLAB && FastTLABRefill) {
          Label slow_path;
          Register G1_obj_size = G1;
          Register G3_t1 = G3;
          Register G4_t2 = G4;
          assert_different_registers(G5_klass, G1_obj_size, G3_t1, G4_t2);

          // Push a frame since we may do dtrace notification for the
          // allocation which requires calling out and we don't want
          // to stomp the real return address.
          __ save_frame(0);

          if (id == fast_new_instance_init_check_id) {
            // make sure the klass is initialized
            __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1);
            __ cmp(G3_t1, instanceKlass::fully_initialized);
            __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
            __ delayed()->nop();
          }
#ifdef ASSERT
          // assert object can be fast path allocated
          {
            Label ok, not_ok;
          __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size);
          __ cmp(G1_obj_size, 0);  // make sure it's an instance (LH > 0)
          __ br(Assembler::lessEqual, false, Assembler::pn, not_ok);
          __ delayed()->nop();
          __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
          __ br(Assembler::zero, false, Assembler::pn, ok);
          __ delayed()->nop();
          __ bind(not_ok);
          __ stop("assert(can be fast path allocated)");
          __ should_not_reach_here();
          __ bind(ok);
          }
#endif // ASSERT
          // if we got here then the TLAB allocation failed, so try
          // refilling the TLAB or allocating directly from eden.
          Label retry_tlab, try_eden;
          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G5_klass

          __ bind(retry_tlab);

          // get the instance size
          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
          __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
          __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
          __ verify_oop(O0_obj);
          __ mov(O0, I0);
          __ ret();
          __ delayed()->restore();

          __ bind(try_eden);
          // get the instance size
          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
          __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
          __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
          __ verify_oop(O0_obj);
          __ mov(O0, I0);
          __ ret();
          __ delayed()->restore();

          __ bind(slow_path);

          // pop this frame so generate_stub_call can push it's own
          __ restore();
        }

        oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_instance), G5_klass);
        // I0->O0: new instance
      }

      break;

#ifdef TIERED
    case counter_overflow_id:
        // G4 contains bci
      oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), G4);
      break;
#endif // TIERED

    case new_type_array_id:
    case new_object_array_id:
      {
        Register G5_klass = G5; // Incoming
        Register G4_length = G4; // Incoming
        Register O0_obj   = O0; // Outgoing

        Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize)
                                    + Klass::layout_helper_offset_in_bytes()));
        assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
        assert(Klass::_lh_header_size_mask == 0xFF, "bytewise");
        // Use this offset to pick out an individual byte of the layout_helper:
        const int klass_lh_header_size_offset = ((BytesPerInt - 1)  // 3 - 2 selects byte {0,1,0,0}
                                                 - Klass::_lh_header_size_shift / BitsPerByte);

        if (id == new_type_array_id) {
          __ set_info("new_type_array", dont_gc_arguments);
        } else {
          __ set_info("new_object_array", dont_gc_arguments);
        }

#ifdef ASSERT
        // assert object type is really an array of the proper kind
        {
          Label ok;
          Register G3_t1 = G3;
          __ ld(klass_lh, G3_t1);
          __ sra(G3_t1, Klass::_lh_array_tag_shift, G3_t1);
          int tag = ((id == new_type_array_id)
                     ? Klass::_lh_array_tag_type_value
                     : Klass::_lh_array_tag_obj_value);
          __ cmp(G3_t1, tag);
          __ brx(Assembler::equal, false, Assembler::pt, ok);
          __ delayed()->nop();
          __ stop("assert(is an array klass)");
          __ should_not_reach_here();
          __ bind(ok);
        }
#endif // ASSERT

        if (UseTLAB && FastTLABRefill) {
          Label slow_path;
          Register G1_arr_size = G1;
          Register G3_t1 = G3;
          Register O1_t2 = O1;
          assert_different_registers(G5_klass, G4_length, G1_arr_size, G3_t1, O1_t2);

          // check that array length is small enough for fast path
          __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1);
          __ cmp(G4_length, G3_t1);
          __ br(Assembler::greaterUnsigned, false, Assembler::pn, slow_path);
          __ delayed()->nop();

          // if we got here then the TLAB allocation failed, so try
          // refilling the TLAB or allocating directly from eden.
          Label retry_tlab, try_eden;
          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G4_length and G5_klass

          __ bind(retry_tlab);

          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
          __ ld(klass_lh, G3_t1);
          __ sll(G4_length, G3_t1, G1_arr_size);
          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
          __ add(G1_arr_size, G3_t1, G1_arr_size);
          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);  // align up
          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);

          __ tlab_allocate(O0_obj, G1_arr_size, 0, G3_t1, slow_path);  // preserves G1_arr_size

          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
          __ add(O0_obj, G3_t1, G3_t1);       // body start
          __ initialize_body(G3_t1, O1_t2);
          __ verify_oop(O0_obj);
          __ retl();
          __ delayed()->nop();

          __ bind(try_eden);
          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
          __ ld(klass_lh, G3_t1);
          __ sll(G4_length, G3_t1, G1_arr_size);
          __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1);
          __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1);
          __ add(G1_arr_size, G3_t1, G1_arr_size);
          __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size);
          __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);

          __ eden_allocate(O0_obj, G1_arr_size, 0, G3_t1, O1_t2, slow_path);  // preserves G1_arr_size

          __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
          __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
          __ sub(G1_arr_size, G3_t1, O1_t2);  // body length
          __ add(O0_obj, G3_t1, G3_t1);       // body start
          __ initialize_body(G3_t1, O1_t2);
          __ verify_oop(O0_obj);
          __ retl();
          __ delayed()->nop();

          __ bind(slow_path);
        }

        if (id == new_type_array_id) {
          oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_type_array), G5_klass, G4_length);
        } else {
          oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_object_array), G5_klass, G4_length);
        }
        // I0 -> O0: new array
      }
      break;

    case new_multi_array_id:
      { // O0: klass
        // O1: rank
        // O2: address of 1st dimension
        __ set_info("new_multi_array", dont_gc_arguments);
        oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_multi_array), I0, I1, I2);
        // I0 -> O0: new multi array
      }
      break;

    case register_finalizer_id:
      {
        __ set_info("register_finalizer", dont_gc_arguments);

        // load the klass and check the has finalizer flag
        Label register_finalizer;
        Register t = O1;
        __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), t);
        __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t);
        __ set(JVM_ACC_HAS_FINALIZER, G3);
        __ andcc(G3, t, G0);
        __ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
        __ delayed()->nop();

        // do a leaf return
        __ retl();
        __ delayed()->nop();

        __ bind(register_finalizer);
        OopMap* oop_map = save_live_registers(sasm);
        int call_offset = __ call_RT(noreg, noreg,
                                     CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), I0);
        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);

        // Now restore all the live registers
        restore_live_registers(sasm);

        __ ret();
        __ delayed()->restore();
      }
      break;

    case throw_range_check_failed_id:
      { __ set_info("range_check_failed", dont_gc_arguments); // arguments will be discarded
        // G4: index
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
      }
      break;

    case throw_index_exception_id:
      { __ set_info("index_range_check_failed", dont_gc_arguments); // arguments will be discarded
        // G4: index
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
      }
      break;

    case throw_div0_exception_id:
      { __ set_info("throw_div0_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
      }
      break;

    case throw_null_pointer_exception_id:
      { __ set_info("throw_null_pointer_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
      }
      break;

    case handle_exception_id:
      {
        __ set_info("handle_exception", dont_gc_arguments);
        // make a frame and preserve the caller's caller-save registers

        oop_maps = new OopMapSet();
        OopMap* oop_map = save_live_registers(sasm);
        __ mov(Oexception->after_save(),  Oexception);
        __ mov(Oissuing_pc->after_save(), Oissuing_pc);
        generate_handle_exception(sasm, oop_maps, oop_map);
      }
      break;

    case unwind_exception_id:
      {
        // O0: exception
        // I7: address of call to this method

        __ set_info("unwind_exception", dont_gc_arguments);
        __ mov(Oexception, Oexception->after_save());
        __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save());

        __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
                        Oissuing_pc->after_save());
        __ verify_not_null_oop(Oexception->after_save());
        __ jmp(O0, 0);
        __ delayed()->restore();
      }
      break;

    case throw_array_store_exception_id:
      {
        __ set_info("throw_array_store_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), false);
      }
      break;

    case throw_class_cast_exception_id:
      {
        // G4: object
        __ set_info("throw_class_cast_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
      }
      break;

    case throw_incompatible_class_change_error_id:
      {
        __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
      }
      break;

    case slow_subtype_check_id:
      { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super );
        // Arguments :
        //
        //      ret  : G3
        //      sub  : G3, argument, destroyed
        //      super: G1, argument, not changed
        //      raddr: O7, blown by call
        Label miss;

        __ save_frame(0);               // Blow no registers!

        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);

        __ mov(1, G3);
        __ ret();                       // Result in G5 is 'true'
        __ delayed()->restore();        // free copy or add can go here

        __ bind(miss);
        __ mov(0, G3);
        __ ret();                       // Result in G5 is 'false'
        __ delayed()->restore();        // free copy or add can go here
      }

    case monitorenter_nofpu_id:
    case monitorenter_id:
      { // G4: object
        // G5: lock address
        __ set_info("monitorenter", dont_gc_arguments);

        int save_fpu_registers = (id == monitorenter_id);
        // make a frame and preserve the caller's caller-save registers
        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);

        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), G4, G5);

        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);
        restore_live_registers(sasm, save_fpu_registers);

        __ ret();
        __ delayed()->restore();
      }
      break;

    case monitorexit_nofpu_id:
    case monitorexit_id:
      { // G4: lock address
        // note: really a leaf routine but must setup last java sp
        //       => use call_RT for now (speed can be improved by
        //       doing last java sp setup manually)
        __ set_info("monitorexit", dont_gc_arguments);

        int save_fpu_registers = (id == monitorexit_id);
        // make a frame and preserve the caller's caller-save registers
        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);

        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), G4);

        oop_maps = new OopMapSet();
        oop_maps->add_gc_map(call_offset, oop_map);
        restore_live_registers(sasm, save_fpu_registers);

        __ ret();
        __ delayed()->restore();

      }
      break;

    case access_field_patching_id:
      { __ set_info("access_field_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
      }
      break;

    case load_klass_patching_id:
      { __ set_info("load_klass_patching", dont_gc_arguments);
        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
      }
      break;

    case jvmti_exception_throw_id:
      { // Oexception : exception
        __ set_info("jvmti_exception_throw", dont_gc_arguments);
        oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, Runtime1::post_jvmti_exception_throw), I0);
      }
      break;

    case dtrace_object_alloc_id:
      { // O0: object
        __ set_info("dtrace_object_alloc", dont_gc_arguments);
        // we can't gc here so skip the oopmap but make sure that all
        // the live registers get saved.
        save_live_registers(sasm);

        __ save_thread(L7_thread_cache);
        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
                relocInfo::runtime_call_type);
        __ delayed()->mov(I0, O0);
        __ restore_thread(L7_thread_cache);

        restore_live_registers(sasm);
        __ ret();
        __ delayed()->restore();
      }
      break;

#ifndef SERIALGC
    case g1_pre_barrier_slow_id:
      { // G4: previous value of memory
        BarrierSet* bs = Universe::heap()->barrier_set();
        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
          __ save_frame(0);
          __ set((int)id, O1);
          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
          __ should_not_reach_here();
          break;
        }

        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);

        Register pre_val = G4;
        Register tmp  = G1_scratch;
        Register tmp2 = G3_scratch;

        Label refill, restart;
        bool with_frame = false; // I don't know if we can do with-frame.
        int satb_q_index_byte_offset =
          in_bytes(JavaThread::satb_mark_queue_offset() +
                   PtrQueue::byte_offset_of_index());
        int satb_q_buf_byte_offset =
          in_bytes(JavaThread::satb_mark_queue_offset() +
                   PtrQueue::byte_offset_of_buf());
        __ bind(restart);
        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);

        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
                          Assembler::pn, tmp, refill);

        // If the branch is taken, no harm in executing this in the delay slot.
        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
        __ sub(tmp, oopSize, tmp);

        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
        // Use return-from-leaf
        __ retl();
        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);

        __ bind(refill);
        __ save_frame(0);

        __ mov(pre_val, L0);
        __ mov(tmp,     L1);
        __ mov(tmp2,    L2);

        __ call_VM_leaf(L7_thread_cache,
                        CAST_FROM_FN_PTR(address,
                                         SATBMarkQueueSet::handle_zero_index_for_thread),
                                         G2_thread);

        __ mov(L0, pre_val);
        __ mov(L1, tmp);
        __ mov(L2, tmp2);

        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
        __ delayed()->restore();
      }
      break;

    case g1_post_barrier_slow_id:
      {
        BarrierSet* bs = Universe::heap()->barrier_set();
        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
          __ save_frame(0);
          __ set((int)id, O1);
          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
          __ should_not_reach_here();
          break;
        }

        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);

        Register addr = G4;
        Register cardtable = G5;
        Register tmp  = G1_scratch;
        Register tmp2 = G3_scratch;
        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;

        Label not_already_dirty, restart, refill;

#ifdef _LP64
        __ srlx(addr, CardTableModRefBS::card_shift, addr);
#else
        __ srl(addr, CardTableModRefBS::card_shift, addr);
#endif

        AddressLiteral rs(byte_map_base);
        __ set(rs, cardtable);         // cardtable := <card table base>
        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]

        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
                          tmp, not_already_dirty);
        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
        // case, harmless if not.
        __ delayed()->add(addr, cardtable, tmp2);

        // We didn't take the branch, so we're already dirty: return.
        // Use return-from-leaf
        __ retl();
        __ delayed()->nop();

        // Not dirty.
        __ bind(not_already_dirty);
        // First, dirty it.
        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).

        Register tmp3 = cardtable;
        Register tmp4 = tmp;

        // these registers are now dead
        addr = cardtable = tmp = noreg;

        int dirty_card_q_index_byte_offset =
          in_bytes(JavaThread::dirty_card_queue_offset() +
                   PtrQueue::byte_offset_of_index());
        int dirty_card_q_buf_byte_offset =
          in_bytes(JavaThread::dirty_card_queue_offset() +
                   PtrQueue::byte_offset_of_buf());
        __ bind(restart);
        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);

        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
                          tmp3, refill);
        // If the branch is taken, no harm in executing this in the delay slot.
        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
        __ sub(tmp3, oopSize, tmp3);

        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
        // Use return-from-leaf
        __ retl();
        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);

        __ bind(refill);
        __ save_frame(0);

        __ mov(tmp2, L0);
        __ mov(tmp3, L1);
        __ mov(tmp4, L2);

        __ call_VM_leaf(L7_thread_cache,
                        CAST_FROM_FN_PTR(address,
                                         DirtyCardQueueSet::handle_zero_index_for_thread),
                                         G2_thread);

        __ mov(L0, tmp2);
        __ mov(L1, tmp3);
        __ mov(L2, tmp4);

        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
        __ delayed()->restore();
      }
      break;
#endif // !SERIALGC

    default:
      { __ set_info("unimplemented entry", dont_gc_arguments);
        __ save_frame(0);
        __ set((int)id, O1);
        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), O1);
        __ should_not_reach_here();
      }
      break;
  }
  return oop_maps;
}