void ConstantPoolCacheEntry::update_pointers() { assert(in_words(size()) == 4, "check code below - may need adjustment"); // field[1] is always oop or NULL PSParallelCompact::adjust_pointer((oop*)&_f1); if (is_vfinal()) { PSParallelCompact::adjust_pointer((oop*)&_f2); } }
void ConstantPoolCacheEntry::oop_iterate(OopClosure* blk) { assert(in_words(size()) == 4, "check code below - may need adjustment"); // field[1] is always oop or NULL blk->do_oop((oop*)&_f1); if (is_vfinal()) { blk->do_oop((oop*)&_f2); } }
void LocalMapping::add(int name, RInfo reg) { assert(_free_regs != NULL, "shouldn't be adding things when register state is unknown"); _free_regs->lock(reg); int offset = in_words(_local_name_to_offset_map->at(name)); LIR_LocalCaching::add_at_all_names(_mapping, offset, reg, _local_name_to_offset_map); _offset_to_register_mapping->at_put(offset, reg); }
void ConstantPoolCacheEntry::follow_contents(ParCompactionManager* cm) { assert(in_words(size()) == 4, "check code below - may need adjustment"); // field[1] is always oop or NULL PSParallelCompact::mark_and_push(cm, (oop*)&_f1); if (is_vfinal()) { PSParallelCompact::mark_and_push(cm, (oop*)&_f2); } }
void ConstantPoolCacheEntry::oop_iterate_m(OopClosure* blk, MemRegion mr) { assert(in_words(size()) == 4, "check code below - may need adjustment"); // field[1] is always oop or NULL if (mr.contains((oop *)&_f1)) blk->do_oop((oop*)&_f1); if (is_vfinal()) { if (mr.contains((oop *)&_f2)) blk->do_oop((oop*)&_f2); } }
void ConstantPoolCacheEntry::adjust_pointers() { assert(in_words(size()) == 4, "check code below - may need adjustment"); // field[1] is always oop or NULL MarkSweep::adjust_pointer((oop*)&_f1); if (is_vfinal()) { MarkSweep::adjust_pointer((oop*)&_f2); } }
void LocalMapping::merge(LocalMapping* other) { for (int i = 0; i < other->length(); i++) { RInfo reg = other->get_cache_reg(i); if (reg.is_valid()) { // if the current mapping doesn't contain anything for this // index then let's use the if (get_cache_reg(i).is_illegal()) { int offset = in_words(_local_name_to_offset_map->at(i)); LIR_LocalCaching::add_at_all_names(_mapping, offset, reg, _local_name_to_offset_map); _offset_to_register_mapping->at_put(offset, reg); } } } }
RInfo LocalMapping::get_cache_reg(int local_index, ValueTag tag) const { if (local_index < _mapping->length()) { RInfo reg = _mapping->at(local_index); if (reg.is_valid()) { assert(_offset_to_register_mapping->at(in_words(_local_name_to_offset_map->at(local_index))).is_same(reg), "should be in both maps."); } switch (tag) { case intTag: case objectTag: if (reg.is_word()) { return reg; } break; case floatTag: if (reg.is_float()) { return reg; } break; case doubleTag: if (reg.is_double()) { return reg; } break; case longTag: if (reg.is_long()) { return reg; } break; case illegalTag: return reg; case addressTag: return norinfo; default: ShouldNotReachHere(); } } return norinfo; }
void LocalMapping::init_cached_regs() { _cached_regs = new RegisterManager(); for (int i = 0; i < _mapping->length(); i++) { RInfo reg = _mapping->at(i); if (reg.is_valid()) { _cached_regs->lock(reg); assert(_free_regs == NULL || !_free_regs->is_free_reg(reg), "shouldn't be free"); } } if (_offset_to_register_mapping == NULL) { // cache the computation once _offset_to_register_mapping = new RInfoCollection(); for (int i = 0; i < _local_name_to_offset_map->length(); i++) { RInfo r = get_cache_reg(i); if (r.is_valid()) _offset_to_register_mapping->at_put(in_words(_local_name_to_offset_map->at(i)), r); } } }
address InterpreterGenerator::generate_accessor_entry() { if (!UseFastAccessorMethods) return NULL; Label& slow_path = fast_accessor_slow_entry_path; address start = __ pc(); // Drop into the slow path if we need a safepoint check. __ load (r3, (intptr_t) SafepointSynchronize::address_of_state()); __ load (r0, Address(r3, 0)); __ compare (r0, SafepointSynchronize::_not_synchronized); __ bne (slow_path); // Load the object pointer and drop into the slow path // if we have a NullPointerException. const Register object = r4; __ load (object, Address(Rlocals, 0)); __ compare (object, 0); __ beq (slow_path); // Read the field index from the bytecode, which looks like this: // 0: 0x2a: aload_0 // 1: 0xb4: getfield // 2: index (high byte) // 3: index (low byte) // 4: 0xac/b0: ireturn/areturn const Register index = r5; __ load (index, Address(Rmethod, methodOopDesc::const_offset())); __ lwz (index, Address(index, constMethodOopDesc::codes_offset())); #ifdef ASSERT { Label ok; __ shift_right (r0, index, 16); __ compare (r0, (Bytecodes::_aload_0 << 8) | Bytecodes::_getfield); __ beq (ok); __ should_not_reach_here (__FILE__, __LINE__); __ bind (ok); } #endif __ andi_ (index, index, 0xffff); // Locate the entry in the constant pool cache const Register entry = r6; __ load (entry, Address(Rmethod, methodOopDesc::constants_offset())); __ load (entry, Address(entry,constantPoolOopDesc::cache_offset_in_bytes())); __ la (entry, Address(entry, constantPoolCacheOopDesc::base_offset())); __ shift_left(r0, index, exact_log2(in_words(ConstantPoolCacheEntry::size())) + LogBytesPerWord); __ add (entry, entry, r0); // Check the validity of the cache entry by testing whether the // _indices field contains Bytecode::_getfield in b1 byte. __ load (r0, Address(entry, ConstantPoolCacheEntry::indices_offset())); __ shift_right (r0, r0, 16); __ andi_ (r0, r0, 0xff); __ compare (r0, Bytecodes::_getfield); __ bne (slow_path); // Calculate the type and offset of the field const Register offset = r7; const Register type = r8; __ load (offset, Address(entry, ConstantPoolCacheEntry::f2_offset())); __ load (type, Address(entry, ConstantPoolCacheEntry::flags_offset())); ConstantPoolCacheEntry::verify_tosBits(); __ shift_right (type, type, ConstantPoolCacheEntry::tosBits); // Load the value Label is_object, is_int, is_byte, is_short, is_char; __ compare (type, atos); __ beq (is_object); __ compare (type, itos); __ beq (is_int); __ compare (type, btos); __ beq (is_byte); __ compare (type, stos); __ beq (is_short); __ compare (type, ctos); __ beq (is_char); __ load (r3, (intptr_t) "error: unknown type: %d\n"); __ mr (r4, type); __ call (CAST_FROM_FN_PTR(address, printf)); __ should_not_reach_here (__FILE__, __LINE__); __ bind (is_object); __ load_indexed (r3, object, offset); __ blr (); __ bind (is_int); __ lwax (r3, object, offset); __ blr (); __ bind (is_byte); __ lbax (r3, object, offset); __ blr (); __ bind (is_short); __ lhax (r3, object, offset); __ blr (); __ bind (is_char); __ lhzx (r3, object, offset); __ blr (); return start; }
void vframeArrayElement::unpack_on_stack(int caller_actual_parameters, int callee_parameters, int callee_locals, frame* caller, bool is_top_frame, bool is_bottom_frame, int exec_mode) { JavaThread* thread = (JavaThread*) Thread::current(); // Look at bci and decide on bcp and continuation pc address bcp; // C++ interpreter doesn't need a pc since it will figure out what to do when it // begins execution address pc; bool use_next_mdp = false; // true if we should use the mdp associated with the next bci // rather than the one associated with bcp if (raw_bci() == SynchronizationEntryBCI) { // We are deoptimizing while hanging in prologue code for synchronized method bcp = method()->bcp_from(0); // first byte code pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode } else if (should_reexecute()) { //reexecute this bytecode assert(is_top_frame, "reexecute allowed only for the top frame"); bcp = method()->bcp_from(bci()); pc = Interpreter::deopt_reexecute_entry(method(), bcp); } else { bcp = method()->bcp_from(bci()); pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame); use_next_mdp = true; } assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode"); // Monitorenter and pending exceptions: // // For Compiler2, there should be no pending exception when deoptimizing at monitorenter // because there is no safepoint at the null pointer check (it is either handled explicitly // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER). If an asynchronous // exception was processed, the bytecode pointer would have to be extended one bytecode beyond // the monitorenter to place it in the proper exception range. // // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter, // in which case bcp should point to the monitorenter since it is within the exception's range. assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame"); assert(thread->deopt_nmethod() != NULL, "nmethod should be known"); guarantee(!(thread->deopt_nmethod()->is_compiled_by_c2() && *bcp == Bytecodes::_monitorenter && exec_mode == Deoptimization::Unpack_exception), "shouldn't get exception during monitorenter"); int popframe_preserved_args_size_in_bytes = 0; int popframe_preserved_args_size_in_words = 0; if (is_top_frame) { JvmtiThreadState *state = thread->jvmti_thread_state(); if (JvmtiExport::can_pop_frame() && (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) { if (thread->has_pending_popframe()) { // Pop top frame after deoptimization #ifndef CC_INTERP pc = Interpreter::remove_activation_preserving_args_entry(); #else // Do an uncommon trap type entry. c++ interpreter will know // to pop frame and preserve the args pc = Interpreter::deopt_entry(vtos, 0); use_next_mdp = false; #endif } else { // Reexecute invoke in top frame pc = Interpreter::deopt_entry(vtos, 0); use_next_mdp = false; popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size()); // Note: the PopFrame-related extension of the expression stack size is done in // Deoptimization::fetch_unroll_info_helper popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words()); } } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) { // Force early return from top frame after deoptimization #ifndef CC_INTERP pc = Interpreter::remove_activation_early_entry(state->earlyret_tos()); #else // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64) #endif } else { // Possibly override the previous pc computation of the top (youngest) frame switch (exec_mode) { case Deoptimization::Unpack_deopt: // use what we've got break; case Deoptimization::Unpack_exception: // exception is pending pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc); // [phh] We're going to end up in some handler or other, so it doesn't // matter what mdp we point to. See exception_handler_for_exception() // in interpreterRuntime.cpp. break; case Deoptimization::Unpack_uncommon_trap: case Deoptimization::Unpack_reexecute: // redo last byte code pc = Interpreter::deopt_entry(vtos, 0); use_next_mdp = false; break; default: ShouldNotReachHere(); } } } // Setup the interpreter frame assert(method() != NULL, "method must exist"); int temps = expressions()->size(); int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors(); Interpreter::layout_activation(method(), temps + callee_parameters, popframe_preserved_args_size_in_words, locks, caller_actual_parameters, callee_parameters, callee_locals, caller, iframe(), is_top_frame, is_bottom_frame); // Update the pc in the frame object and overwrite the temporary pc // we placed in the skeletal frame now that we finally know the // exact interpreter address we should use. _frame.patch_pc(thread, pc); assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors"); BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin(); for (int index = 0; index < locks; index++) { top = iframe()->previous_monitor_in_interpreter_frame(top); BasicObjectLock* src = _monitors->at(index); top->set_obj(src->obj()); src->lock()->move_to(src->obj(), top->lock()); } if (ProfileInterpreter) { iframe()->interpreter_frame_set_mdx(0); // clear out the mdp. } iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet if (ProfileInterpreter) { methodDataOop mdo = method()->method_data(); if (mdo != NULL) { int bci = iframe()->interpreter_frame_bci(); if (use_next_mdp) ++bci; address mdp = mdo->bci_to_dp(bci); iframe()->interpreter_frame_set_mdp(mdp); } } // Unpack expression stack // If this is an intermediate frame (i.e. not top frame) then this // only unpacks the part of the expression stack not used by callee // as parameters. The callee parameters are unpacked as part of the // callee locals. int i; for(i = 0; i < expressions()->size(); i++) { StackValue *value = expressions()->at(i); intptr_t* addr = iframe()->interpreter_frame_expression_stack_at(i); switch(value->type()) { case T_INT: *addr = value->get_int(); break; case T_OBJECT: *addr = value->get_int(T_OBJECT); break; case T_CONFLICT: // A dead stack slot. Initialize to null in case it is an oop. *addr = NULL_WORD; break; default: ShouldNotReachHere(); } } // Unpack the locals for(i = 0; i < locals()->size(); i++) { StackValue *value = locals()->at(i); intptr_t* addr = iframe()->interpreter_frame_local_at(i); switch(value->type()) { case T_INT: *addr = value->get_int(); break; case T_OBJECT: *addr = value->get_int(T_OBJECT); break; case T_CONFLICT: // A dead location. If it is an oop then we need a NULL to prevent GC from following it *addr = NULL_WORD; break; default: ShouldNotReachHere(); } } if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) { // An interpreted frame was popped but it returns to a deoptimized // frame. The incoming arguments to the interpreted activation // were preserved in thread-local storage by the // remove_activation_preserving_args_entry in the interpreter; now // we put them back into the just-unpacked interpreter frame. // Note that this assumes that the locals arena grows toward lower // addresses. if (popframe_preserved_args_size_in_words != 0) { void* saved_args = thread->popframe_preserved_args(); assert(saved_args != NULL, "must have been saved by interpreter"); #ifdef ASSERT assert(popframe_preserved_args_size_in_words <= iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords, "expression stack size should have been extended"); #endif // ASSERT int top_element = iframe()->interpreter_frame_expression_stack_size()-1; intptr_t* base; if (frame::interpreter_frame_expression_stack_direction() < 0) { base = iframe()->interpreter_frame_expression_stack_at(top_element); } else { base = iframe()->interpreter_frame_expression_stack(); } Copy::conjoint_jbytes(saved_args, base, popframe_preserved_args_size_in_bytes); thread->popframe_free_preserved_args(); } } #ifndef PRODUCT if (TraceDeoptimization && Verbose) { ttyLocker ttyl; tty->print_cr("[%d Interpreted Frame]", ++unpack_counter); iframe()->print_on(tty); RegisterMap map(thread); vframe* f = vframe::new_vframe(iframe(), &map, thread); f->print(); tty->print_cr("locals size %d", locals()->size()); tty->print_cr("expression size %d", expressions()->size()); method()->print_value(); tty->cr(); // method()->print_codes(); } else if (TraceDeoptimization) { tty->print(" "); method()->print_value(); Bytecodes::Code code = Bytecodes::java_code_at(method(), bcp); int bci = method()->bci_from(bcp); tty->print(" - %s", Bytecodes::name(code)); tty->print(" @ bci %d ", bci); tty->print_cr("sp = " PTR_FORMAT, iframe()->sp()); } #endif // PRODUCT // The expression stack and locals are in the resource area don't leave // a dangling pointer in the vframeArray we leave around for debug // purposes _locals = _expressions = NULL; }
// --- build_repack_buffer --------------------------------------------------- // Build a IFrame structure to help ASM code repack the 1 compiled frame into // many interpreter (or C1) frames. Takes in the current thread and a vframe; // the vframe is pointing and the virtual Java frame needing to be repacked. // It takes in the callee (which this frame is busy trying to call in it's // inlined code), and an array of IFrames. It returns the updated IFrame // buffer filled in for this frame. void Deoptimization::build_repack_buffer( JavaThread *thread, frame fr, IFrame *buf, const DebugMap *dm, const DebugScope *ds, intptr_t *jexstk, objectRef *lckstk, bool is_deopt, bool is_c1, bool is_youngest) { assert( thread->_deopt_buffer->contains((char*)(buf+1)), "over-ran large deopt buffer?" ); int bci=ds->bci(); if(bci==InvocationEntryBci){ // We deoptimized while hanging in prologue code for a synchronized // method. We got the lock (after all, deopt happens after returning // from the blocking call). We want to begin execution in the // interpreter at BCI 0, and after taking the lock. // Also it is possilble to enter the deopt code through the br_s on method // entry before the first byte code. bci = 0; } const methodOop moop = ds->method().as_methodOop(); if( ds->caller() ) { // Do I have a caller? Am I mid-call? // Initialize the constant pool entry for caller-parameter size. It // might be the case that we inlined and compiled a callee, and are busy // calling it in the compiled code, and get deoptimized with that callee // in-progress AND we've never executed it in the interpreter - which // would have filled in the constant pool cache before making the call. // Fill it in now. const methodOop caller = ds->caller()->method().as_methodOop(); int index = Bytes::get_native_u2(caller->bcp_from(ds->caller()->bci())+1); ConstantPoolCacheEntry *cpe = caller->constants()->cache()->entry_at(index); // Since we are setting the constant pool entry here, and another thread // could be busy resolving here we have a race condition setting the // flags. Use a CAS to only set the flags if they are currently 0. intx *flags_adr = (intx*)((intptr_t)cpe + in_bytes(ConstantPoolCacheEntry::flags_offset())); if( !*flags_adr ) { // Flags currently 0? // Set the flags, because the interpreter-return-entry points need some // info from them. Not all fields are set, because it's too complex to // do it here... and not needed. The cpCacheEntry is left "unresolved" // such that the next real use of it from the interpreter will be forced // to do a proper resolve, which will fill in the missing fields. // Compute new flags needed by the interpreter-return-entry intx flags = (moop->size_of_parameters() & 0xFF) | (1 << ConstantPoolCacheEntry::hotSwapBit) | (moop->result_type() << ConstantPoolCacheEntry::tosBits); // CAS 'em in, but only if there is currently a 0 flags assert0( sizeof(jlong)==sizeof(intx) ); Atomic::cmpxchg((jlong)flags, (jlong*)flags_adr, 0); // We don't care about the result, because the cache is monomorphic. // Either our CAS succeeded and jammed the right parameter count, or // another thread succeeded and jammed in the right parameter count. } } if (TraceDeoptimization) { BufferedLoggerMark m(NOTAG, Log::M_DEOPT, TraceDeoptimization, true); m.out("DEOPT REPACK c%d: ", is_c1 ? 1 : 2); moop->print_short_name(m.stream()); m.out(" @ bci %d %s", bci, ds->caller() ? "called by...": " (oldest frame)" ); } // If there was a suitable C1 frame, use it. // Otherwise, use an interpreter frame. if( 1 ) { // Build an interpreter-style IFrame. Naked oops abound. assert0( !objectRef(moop).is_stack() ); buf->_mref = objectRef(moop); buf->_cpc = moop->constants()->cacheRef(); // Compute monitor list length. If we have coarsened a lock we will end // up unlocking it and the repack buffer will not need to see it. uint mons_len = ds->numlocks(); if( ds->is_extra_lock() ) { mons_len--; assert0( mons_len >= 0 ); } assert0( mons_len < (256*sizeof(buf->_numlck)) ); buf->_numlck = mons_len; // Set up the return pc for the next frame: the next frame is a younger // frame which will return to this older frame. All middle frames return // back into the interpreter, just after a call with proper TOS state. // Youngest frames always start in vtos state because the uncommon-trap // blob sets them up that way. const address bcp = moop->bcp_from(bci); Bytecodes::Code c = Bytecodes::java_code(Bytecodes::cast(*bcp)); BasicType return_type=T_VOID; bool handle_popframe = is_youngest && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution(); int bci_bump = 0; if( !is_youngest ) { // Middle-frame? bool from_call = (c == Bytecodes::_invokevirtual || c==Bytecodes::_invokespecial|| c==Bytecodes::_invokestatic|| c == Bytecodes::_invokeinterface ); assert(from_call,"Middle frame is in the middle of a call"); bci_bump = Bytecodes::length_at(bcp); // But need to know how much it will be bumped for the return address buf->_bci = bci; // Save bci without bumping it; normal interpreter call returns bump the bci as needed buf[-1]._retadr = Interpreter::return_entry(vtos, bci_bump); } else if( thread->pending_exception() ) { // Deopt-with-pending. Throw up on return to interpreter, which is // handled by unpack_and_go. buf->_bci=bci; buf[-1]._retadr = Interpreter::unpack_and_go(); } else if( !is_deopt ) { // It is a C2-style uncommon-trap. // Do NOT increment the BCP! We are re-executing the current bytecode. buf->_bci=bci; buf[-1]._retadr = Interpreter::unpack_and_go(); } else { // It is a plain deopt // It is a deopt without exception. See if we are C1 in mid-patch. // If so, we always need to re-execute the bytecode. bool is_C1_mid_patch = false; if( is_c1 ) { // C1 codeblob? address caller_pc=fr.pc(); if(NativeCall::is_call_before(caller_pc)){ address target = nativeCall_at(caller_pc)->destination(); is_C1_mid_patch = target == Runtime1::entry_for(Runtime1::load_klass_patching_id); } } if( is_C1_mid_patch ) { Untested(""); // Do NOT increment the BCP! We are re-executing the current bytecode. } else if( ds->bci() == InvocationEntryBci ) { // It is deopt while hanging on a method-entry lock. // Do not advance BCP, as we have not executed bci 0 yet. } else { // Else C2 or C1-not-mid-patch // It is a deopt. Whether we re-execute the current bytecode or // assume it has completed depends on the bytecode. switch( c ) { case Bytecodes::_lookupswitch: case Bytecodes::_tableswitch: case Bytecodes::_fast_binaryswitch: case Bytecodes::_fast_linearswitch: // recompute condtional expression folded into _if<cond> case Bytecodes::_lcmp : case Bytecodes::_fcmpl : case Bytecodes::_fcmpg : case Bytecodes::_dcmpl : case Bytecodes::_dcmpg : case Bytecodes::_ifnull : case Bytecodes::_ifnonnull : case Bytecodes::_goto : case Bytecodes::_goto_w : case Bytecodes::_ifeq : case Bytecodes::_ifne : case Bytecodes::_iflt : case Bytecodes::_ifge : case Bytecodes::_ifgt : case Bytecodes::_ifle : case Bytecodes::_if_icmpeq : case Bytecodes::_if_icmpne : case Bytecodes::_if_icmplt : case Bytecodes::_if_icmpge : case Bytecodes::_if_icmpgt : case Bytecodes::_if_icmple : case Bytecodes::_if_acmpeq : case Bytecodes::_if_acmpne : // special cases case Bytecodes::_aastore: // We are re-executing the current bytecode. Untested(""); break; // special cases case Bytecodes::_putstatic: case Bytecodes::_getstatic: case Bytecodes::_getfield: case Bytecodes::_putfield: // We are re-executing the current bytecode. break; case Bytecodes::_athrow : break; // Must be deopt-w-exception case Bytecodes::_invokevirtual: case Bytecodes::_invokespecial: case Bytecodes::_invokestatic:{ methodHandle mh(thread,moop); return_type=Bytecode_invoke_at(mh,bci)->result_type(thread); if( !handle_popframe && !ds->should_reexecute()) bci_bump = 3; // Increment the BCP to post-call!!! See below! break; } case Bytecodes::_invokeinterface:{ methodHandle mh(thread,moop); return_type=Bytecode_invoke_at(mh,bci)->result_type(thread); if( !handle_popframe && !ds->should_reexecute()) bci_bump = 5; // Increment the BCP to post-call!!! See below! break; } case Bytecodes::_ldc : Untested(""); return_type=constant_pool_type(moop,*(bcp+1)); if( !ds->should_reexecute()) bci_bump = 2; // Increment the BCP to post-call!!! See below! break; case Bytecodes::_ldc_w : // fall through case Bytecodes::_ldc2_w: return_type=constant_pool_type(moop,Bytes::get_Java_u2(bcp+1)); if( !ds->should_reexecute()) bci_bump = 3; // Increment the BCP to post-call!!! See below! break; default: return_type=Bytecodes::result_type(c); if( !ds->should_reexecute()) bci_bump = Bytecodes::length_at(bcp); // Increment the BCP to post-call!!! See below! break; } if (ds->should_reexecute()) return_type = T_VOID; } // Save (possibly advanced) bci buf->_bci = bci+bci_bump; buf[-1]._retadr = Interpreter::unpack_and_go(); // Interpreter::return_entry(vtos, bci_bump); } // --- // Now all the Java locals. // First set the start of locals for the interpreter frame we are building. buf->_loc = (intptr_t)jexstk; uint loc_len = moop->max_locals(); for(uint i=0;i<loc_len;i++){ *jexstk++ = dm->get_value(ds->get_local(i),fr); } // Now that the locals have been unpacked if we have any deferred local writes // added by jvmti then we can free up that structure as the data is now in the // buffer GrowableArray<jvmtiDeferredLocalVariableSet*>* list = thread->deferred_locals(); if( list ) { // Because of inlining we could have multiple vframes for a single frame // and several of the vframes could have deferred writes. Find them all. Unimplemented(); } // --- // Now all the Java Expressions uint expr_len = ds->numstk(); for(uint i=0;i<expr_len;i++) *jexstk++ = dm->get_value(ds->get_expr(i),fr); // If returning from a deoptimized call, we will have return values in // registers that need to end up on the Java execution stack. They are // not recorded in the debug info, since they did not exist at the time // the call began. if( is_youngest && is_deopt ) { if( type2size[return_type] > 0 ) { if( type2size[return_type]==2 ) { *jexstk++ = (intptr_t)frame::double_slot_primitive_type_empty_slot_id << 32; } *jexstk++ = pd_fetch_return_values( thread, return_type ); // Need to adjust the final jexstk_top for the youngest frame // returning values. These returned values are not accounted for in // the standard debug info. thread->_jexstk_top = jexstk; } } // JVMTI PopFrame support // Add the number of words of popframe preserved args to expr_len int popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size()); int popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words()); if (handle_popframe) { Unimplemented(); expr_len += popframe_preserved_args_size_in_words; // An interpreted frame was popped but it returns to a deoptimized // frame. The incoming arguments to the interpreted activation // were preserved in thread-local storage by the // remove_activation_preserving_args_entry in the interpreter; now // we put them back into the just-unpacked interpreter frame. // Note that this assumes that the locals arena grows toward lower // addresses. } // Set the JEX stk top buf->_stk = (intptr_t)jexstk; // --- // Now move locked objects to the interpreters lock-stack. // No need to inflate anything, as we're moving standard oops. int numlcks = ds->numlocks(); if( ds->is_extra_lock() ) { // coarsened a lock Untested(""); // The last lock is "coarsened" - kept locked when it should have been // unlocked and relocked. With no deopt, keeping it locked saves the 2 // sets of back-to-back CAS's and fences. However, here we need to // unlock it to match the proper Java state. ObjectSynchronizer::unlock(ALWAYS_POISON_OBJECTREF((objectRef)dm->get_value(ds->get_lock(numlcks-1),fr)).as_oop()); numlcks--; } for(int i=0;i<numlcks;i++){ *lckstk++ = ALWAYS_POISON_OBJECTREF((objectRef)dm->get_value(ds->get_lock(i),fr)); } } else { // Make a C1 frame Unimplemented(); } }
void LocalMapping::emit_transition(LIR_List* lir, LocalMapping* pred_mapping, LocalMapping* sux_mapping, IR* ir) { BitMap offset_bitmap(in_words(ir->highest_used_offset())); offset_bitmap.clear(); WordSizeList* local_name_to_offset_map = ir->local_name_to_offset_map(); // spill preceding block cached locals if (pred_mapping) { for (int i = 0; i < pred_mapping->length(); i++) { RInfo reg = pred_mapping->get_cache_reg(i); if (reg.is_valid()) { if (sux_mapping) { RInfo current = sux_mapping->get_cache_reg(i); if (current.is_same(reg)) { continue; } } int offset = in_words(local_name_to_offset_map->at(i)); if (offset_bitmap.at(offset)) { continue; } else { offset_bitmap.at_put(offset, true); } if (reg.is_word()) { lir->reg2single_stack(reg, i, T_INT); } else if (reg.is_long()) { lir->reg2double_stack(reg, i, T_LONG); } else if (reg.is_float()) { lir->reg2single_stack(reg, i, T_FLOAT); } else if (reg.is_double()) { lir->reg2double_stack(reg, i, T_DOUBLE); } else { ShouldNotReachHere(); } if (C1InvalidateCachedOopLocation) { lir->int2reg(-1, reg); } } } } offset_bitmap.clear(); // load successor block cached locals if (sux_mapping) { for (int i = 0; i < sux_mapping->length(); i++) { RInfo reg = sux_mapping->get_cache_reg(i); if (reg.is_valid()) { if (pred_mapping) { RInfo previous = pred_mapping->get_cache_reg(i); if (previous.is_same(reg)) { continue; } } int offset = in_words(local_name_to_offset_map->at(i)); if (offset_bitmap.at(offset)) { continue; } else { offset_bitmap.at_put(offset, true); } if (reg.is_word()) { lir->single_stack2reg(i, reg, T_INT); } else if (reg.is_long()) { lir->double_stack2reg(i, reg, T_LONG); } else if (reg.is_float()) { lir->single_stack2reg(i, reg, T_FLOAT); } else if (reg.is_double()) { lir->double_stack2reg(i, reg, T_DOUBLE); } else { ShouldNotReachHere(); } if (C1InvalidateCachedOopLocation) { lir->int2stack(-1, i); } } } if (C1InvalidateCachedOopLocation) { c1_RegMask free_regs = sux_mapping->free_cpu_registers(); while (!free_regs.is_empty()) { RInfo reg = free_regs.get_first_reg(); free_regs.remove_reg(reg); lir->int2reg(-1, reg); } } } }
void LocalMapping::remove(int name) { int offset = in_words(_local_name_to_offset_map->at(name)); LIR_LocalCaching::remove_at_all_names(_mapping, in_words(_local_name_to_offset_map->at(name)), _local_name_to_offset_map); _offset_to_register_mapping->at_put(offset, norinfo); }
// Call an accessor method (assuming it is resolved, otherwise drop into // vanilla (slow path) entry. address InterpreterGenerator::generate_accessor_entry(void) { if (!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods))) { return NULL; } Label Lslow_path, Lacquire; const Register Rclass_or_obj = R3_ARG1, Rconst_method = R4_ARG2, Rcodes = Rconst_method, Rcpool_cache = R5_ARG3, Rscratch = R11_scratch1, Rjvmti_mode = Rscratch, Roffset = R12_scratch2, Rflags = R6_ARG4, Rbtable = R7_ARG5; static address branch_table[number_of_states]; address entry = __ pc(); // Check for safepoint: // Ditch this, real man don't need safepoint checks. // Also check for JVMTI mode // Check for null obj, take slow path if so. __ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); __ lwz(Rjvmti_mode, thread_(interp_only_mode)); __ cmpdi(CCR1, Rclass_or_obj, 0); __ cmpwi(CCR0, Rjvmti_mode, 0); __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2); __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0 // Do 2 things in parallel: // 1. Load the index out of the first instruction word, which looks like this: // <0x2a><0xb4><index (2 byte, native endianess)>. // 2. Load constant pool cache base. __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method); __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method); __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field. __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache); // Get the const pool entry by means of <index>. const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord); __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<<codes_shift __ add(Rcpool_cache, Rscratch, Rcpool_cache); // Check if cpool cache entry is resolved. // We are resolved if the indices offset contains the current bytecode. ByteSize cp_base_offset = ConstantPoolCache::base_offset(); // Big Endian: __ lbz(Rscratch, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::indices_offset()) + 7 - 2, Rcpool_cache); __ cmpwi(CCR0, Rscratch, Bytecodes::_getfield); __ bne(CCR0, Lslow_path); __ isync(); // Order succeeding loads wrt. load of _indices field from cpool_cache. // Finally, start loading the value: Get cp cache entry into regs. __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache); __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache); // Following code is from templateTable::getfield_or_static // Load pointer to branch table __ load_const_optimized(Rbtable, (address)branch_table, Rscratch); // Get volatile flag __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit // note: sync is needed before volatile load on PPC64 // Check field type __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits); #ifdef ASSERT Label LFlagInvalid; __ cmpldi(CCR0, Rflags, number_of_states); __ bge(CCR0, LFlagInvalid); __ ld(R9_ARG7, 0, R1_SP); __ ld(R10_ARG8, 0, R21_sender_SP); __ cmpd(CCR0, R9_ARG7, R10_ARG8); __ asm_assert_eq("backlink", 0x543); #endif // ASSERT __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. // Load from branch table and dispatch (volatile case: one instruction ahead) __ sldi(Rflags, Rflags, LogBytesPerWord); __ cmpwi(CCR6, Rscratch, 1); // volatile? if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0 } __ ldx(Rbtable, Rbtable, Rflags); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point } __ mtctr(Rbtable); __ bctr(); #ifdef ASSERT __ bind(LFlagInvalid); __ stop("got invalid flag", 0x6541); bool all_uninitialized = true, all_initialized = true; for (int i = 0; i<number_of_states; ++i) { all_uninitialized = all_uninitialized && (branch_table[i] == NULL); all_initialized = all_initialized && (branch_table[i] != NULL); } assert(all_uninitialized != all_initialized, "consistency"); // either or __ fence(); // volatile entry point (one instruction before non-volatile_entry point) if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point __ stop("unexpected type", 0x6551); #endif if (branch_table[itos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[itos] = __ pc(); // non-volatile_entry point __ lwax(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[ltos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[ltos] = __ pc(); // non-volatile_entry point __ ldx(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[btos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[btos] = __ pc(); // non-volatile_entry point __ lbzx(R3_RET, Rclass_or_obj, Roffset); __ extsb(R3_RET, R3_RET); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[ctos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[ctos] = __ pc(); // non-volatile_entry point __ lhzx(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[stos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[stos] = __ pc(); // non-volatile_entry point __ lhax(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[atos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[atos] = __ pc(); // non-volatile_entry point __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj); __ verify_oop(R3_RET); //__ dcbt(R3_RET); // prefetch __ beq(CCR6, Lacquire); __ blr(); } __ align(32, 12); __ bind(Lacquire); __ twi_0(R3_RET); __ isync(); // acquire __ blr(); #ifdef ASSERT for (int i = 0; i<number_of_states; ++i) { assert(branch_table[i], "accessor_entry initialization"); //tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i])); } #endif __ bind(Lslow_path); __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), Rscratch); __ flush(); return entry; }
LocalMapping* LIR_LocalCaching::compute_caching(ALocalList* locals, RegisterManager* registers) { int i; int num_free_cpu_regs = registers->num_free_cpu(); ALocalList* reg_locals = new ALocalList(num_free_cpu_regs); // insert all the loop locals first for (i = 0; i < locals->length(); i++) { ALocal* local = locals->at(i); int index = local->index(); int size = 1; switch (local->type()) { case longTag: if (!CacheDoubleWord) { break; } size = 2; case objectTag: case intTag: if (num_free_cpu_regs >= size) { assert(!reg_locals->contains(local), "shouldn't be in there yet"); reg_locals->append(local); num_free_cpu_regs -= size; } break; case doubleTag: if (!CacheDoubleWord) { break; } case floatTag: reg_locals->append(local); break; } } reg_locals->sort(ALocal::sort_by_index); RInfoCollection* mapping = new RInfoCollection(); WordSizeList* local_name_to_offset_map = ir()->local_name_to_offset_map(); // first allocate all locations which have preferred registers for (i = 0; i < reg_locals->length(); i++) { ALocal* local = reg_locals->at(i); int index = local->index(); RInfo reg = preferred()->get_cache_reg(index, local->type()); // if there's no preferred mapping or the register is unavailable, skip it if (reg.is_illegal() || !registers->is_free_reg(reg)) { continue; } assert(!reg.is_illegal(), "we should always have something"); assert(is_illegal_at_all_names(mapping, index, local_name_to_offset_map), "shouldn't be mapped yet"); registers->lock(reg); assert(!registers->is_free_reg(reg), "must be locked"); // Must translate this index back into all local names which map to it add_at_all_names(mapping, index, reg, local_name_to_offset_map); } // allocate everything else to available registers for (i = reg_locals->length() - 1; i >= 0; i--) { ALocal* local = reg_locals->at(i); int index = local->index(); { debug_only(bool found_one = false;); int first_index; for (int j = 0; j < local_name_to_offset_map->length(); j++) { if (index == in_words(local_name_to_offset_map->at(j))) { first_index = j; debug_only(found_one = true;) break; } }
// Here we set all the flags void ScanBlocks::scan_block(BlockBegin* block, ScanResult* desc, bool live_only) { for (Instruction* n = block; n != NULL; n = n->next()) { if (live_only && !n->is_pinned() && (n->use_count() == 0)) { // don't look at unused instructions because no code is emitted for them continue; } ValueTag tag = n->type()->tag(); if (tag == floatTag) desc->set_has_floats(true); else if (tag == doubleTag) desc->set_has_doubles(true); if (n->as_StateSplit() != NULL) { if (n->as_Invoke() != NULL) { desc->set_has_calls(true); } else if (n->as_NewArray() || n->as_NewInstance() || n->as_AccessMonitor()) { desc->set_has_slow_cases(true); } else if(n->as_Intrinsic() != NULL) { Intrinsic* i = n->as_Intrinsic(); if (i->id() == methodOopDesc::_arraycopy) desc->set_has_slow_cases(true); if (!i->preserves_state()) desc->set_has_calls(true); } } else if (n->as_AccessField() != NULL) { AccessField* af = n->as_AccessField(); if (!af->is_initialized() || !af->is_loaded()) desc->set_has_class_init(true); } else if (n->as_AccessLocal() != NULL) { AccessLocal* local = n->as_AccessLocal(); StoreLocal* store = n->as_StoreLocal(); int use_count = 0; if (store != NULL) { if (!store->is_eliminated()) { use_count = 1; } } else { use_count = n->use_count(); } if (use_count > 0) { ValueType* type = local->type(); assert(local->has_offset(), "must have had offset allocated"); accumulate_access(in_words(local->offset()), tag, use_count); } } #ifdef SPARC else { if (n->as_Convert() != NULL) { Convert* conv = n->as_Convert(); switch (conv->op()) { case Bytecodes::_l2f: case Bytecodes::_l2d: case Bytecodes::_f2l: case Bytecodes::_d2l: case Bytecodes::_d2i: { desc->set_has_calls(true); break; } } } else if (n->as_ArithmeticOp() != NULL) { ArithmeticOp* arith = n->as_ArithmeticOp(); switch (arith->op()) { case Bytecodes::_lrem: case Bytecodes::_ldiv: case Bytecodes::_lmul: case Bytecodes::_drem: case Bytecodes::_frem: { desc->set_has_calls(true); break; } } } } #endif } }