CallGenerator* CallGenerator::for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile) { ciMethodHandle* method_handle = call_site->get_target(); // Set the callee to have access to the class and signature in the // MethodHandleCompiler. method_handle->set_callee(callee); method_handle->set_caller(caller); method_handle->set_call_profile(profile); // Get an adapter for the MethodHandle. ciMethod* target_method = method_handle->get_invokedynamic_adapter(); if (target_method != NULL) { Compile *C = Compile::current(); CallGenerator* cg = C->call_generator(target_method, -1, false, jvms, true, PROB_ALWAYS); if (cg != NULL && cg->is_inline()) { // Add a dependence for invalidation of the optimization. if (!call_site->is_constant_call_site()) { C->dependencies()->assert_call_site_target_value(call_site, method_handle); } return cg; } } return NULL; }
JVMState* ParseGenerator::generate(JVMState* jvms, Parse* parent_parser) { Compile* C = Compile::current(); if (is_osr()) { // The JVMS for a OSR has a single argument (see its TypeFunc). assert(jvms->depth() == 1, "no inline OSR"); } if (C->failing()) { return NULL; // bailing out of the compile; do not try to parse } Parse parser(jvms, method(), _expected_uses, parent_parser); // Grab signature for matching/allocation #ifdef ASSERT if (parser.tf() != (parser.depth() == 1 ? C->tf() : tf())) { MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag); assert(C->env()->system_dictionary_modification_counter_changed(), "Must invalidate if TypeFuncs differ"); } #endif GraphKit& exits = parser.exits(); if (C->failing()) { while (exits.pop_exception_state() != NULL) ; return NULL; } assert(exits.jvms()->same_calls_as(jvms), "sanity"); // Simply return the exit state of the parser, // augmented by any exceptional states. return exits.transfer_exceptions_into_jvms(); }
void IndexSet::populate_free_list() { Compile *compile = Compile::current(); BitBlock *free = (BitBlock*)compile->indexSet_free_block_list(); char *mem = (char*)arena()->Amalloc_4(sizeof(BitBlock) * bitblock_alloc_chunk_size + 32); // Align the pointer to a 32 bit boundary. BitBlock *new_blocks = (BitBlock*)(((uintptr_t)mem + 32) & ~0x001F); // Add the new blocks to the free list. for (int i = 0; i < bitblock_alloc_chunk_size; i++) { new_blocks->set_next(free); free = new_blocks; new_blocks++; } compile->set_indexSet_free_block_list(free); #ifdef ASSERT if (CollectIndexSetStatistics) { _alloc_new += bitblock_alloc_chunk_size; } #endif }
inline void* operator new( size_t x ) { Compile* compile = Compile::current(); compile->set_type_last_size(x); void *temp = compile->type_arena()->Amalloc_D(x); compile->set_type_hwm(temp); return temp; }
CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) { assert(callee->is_method_handle_intrinsic() || callee->is_compiled_lambda_form(), "for_method_handle_call mismatch"); bool input_not_const; CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const); Compile* C = Compile::current(); if (cg != NULL) { if (!delayed_forbidden && AlwaysIncrementalInline) { return CallGenerator::for_late_inline(callee, cg); } else { return cg; } } int bci = jvms->bci(); ciCallProfile profile = caller->call_profile_at_bci(bci); int call_site_count = caller->scale_count(profile.count()); if (IncrementalInline && call_site_count > 0 && (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) { return CallGenerator::for_mh_late_inline(caller, callee, input_not_const); } else { // Out-of-line call. return CallGenerator::for_direct_call(callee); } }
virtual void print_inlining_late(const char* msg) { CallNode* call = call_node(); Compile* C = Compile::current(); C->print_inlining_assert_ready(); C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg); C->print_inlining_move_to(this); C->print_inlining_update_delayed(this); }
virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) { Compile *C = Compile::current(); C->print_inlining_skip(this); C->add_boxing_late_inline(this); JVMState* new_jvms = DirectCallGenerator::generate(jvms, parent_parser); return new_jvms; }
JVMState* DynamicCallGenerator::generate(JVMState* jvms) { GraphKit kit(jvms); Compile* C = kit.C; PhaseGVN& gvn = kit.gvn(); if (C->log() != NULL) { C->log()->elem("dynamic_call bci='%d'", jvms->bci()); } // Get the constant pool cache from the caller class. ciMethod* caller_method = jvms->method(); ciBytecodeStream str(caller_method); str.force_bci(jvms->bci()); // Set the stream to the invokedynamic bci. assert(str.cur_bc() == Bytecodes::_invokedynamic, "wrong place to issue a dynamic call!"); ciCPCache* cpcache = str.get_cpcache(); // Get the offset of the CallSite from the constant pool cache // pointer. int index = str.get_method_index(); size_t call_site_offset = cpcache->get_f1_offset(index); // Load the CallSite object from the constant pool cache. const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass()); Node* cpcache_adr = kit.makecon(cpcache_type); Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset); // The oops in the constant pool cache are not compressed; load then as raw pointers. Node* call_site = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw); // Load the target MethodHandle from the CallSite object. const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass()); Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes()); Node* target_mh = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT); address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub(); CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci()); // invokedynamic is treated as an optimized invokevirtual. call->set_optimized_virtual(true); // Take extra care (in the presence of argument motion) not to trash the SP: call->set_method_handle_invoke(true); // Pass the target MethodHandle as first argument and shift the // other arguments. call->init_req(0 + TypeFunc::Parms, target_mh); uint nargs = call->method()->arg_size(); for (uint i = 1; i < nargs; i++) { Node* arg = kit.argument(i - 1); call->init_req(i + TypeFunc::Parms, arg); } kit.set_edges_for_java_call(call); Node* ret = kit.set_results_for_java_call(call); kit.push_node(method()->return_type()->basic_type(), ret); return kit.transfer_exceptions_into_jvms(); }
virtual JVMState* generate(JVMState* jvms) { Compile *C = Compile::current(); C->log_inline_id(this); C->add_boxing_late_inline(this); JVMState* new_jvms = DirectCallGenerator::generate(jvms); return new_jvms; }
//------------------------------dump_spec-------------------------------------- // Print any per-operand special info void MachNode::dump_spec(outputStream *st) const { uint cnt = num_opnds(); for( uint i=0; i<cnt; i++ ) _opnds[i]->dump_spec(st); const TypePtr *t = adr_type(); if( t ) { Compile* C = Compile::current(); if( C->alias_type(t)->is_volatile() ) st->print(" Volatile!"); } }
virtual JVMState* generate(JVMState* jvms) { JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); Compile* C = Compile::current(); if (_input_not_const) { // inlining won't be possible so no need to enqueue right now. call_node()->set_generator(this); } else { C->add_late_inline(this); } return new_jvms; }
JVMState* WarmCallGenerator::generate(JVMState* jvms) { Compile* C = Compile::current(); if (C->log() != NULL) { C->log()->elem("warm_call bci='%d'", jvms->bci()); } jvms = _if_cold->generate(jvms); if (jvms != NULL) { Node* m = jvms->map()->control(); if (m->is_CatchProj()) m = m->in(0); else m = C->top(); if (m->is_Catch()) m = m->in(0); else m = C->top(); if (m->is_Proj()) m = m->in(0); else m = C->top(); if (m->is_CallJava()) { _call_info->set_call(m->as_Call()); _call_info->set_hot_cg(_if_hot); #ifndef PRODUCT if (PrintOpto || PrintOptoInlining) { tty->print_cr("Queueing for warm inlining at bci %d:", jvms->bci()); tty->print("WCI: "); _call_info->print(); } #endif _call_info->set_heat(_call_info->compute_heat()); C->set_warm_calls(_call_info->insert_into(C->warm_calls())); } } return jvms; }
virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) { Compile *C = Compile::current(); C->print_inlining_skip(this); // Record that this call site should be revisited once the main // parse is finished. if (!is_mh_late_inline()) { C->add_late_inline(this); } // Emit the CallStaticJava and request separate projections so // that the late inlining logic can distinguish between fall // through and exceptional uses of the memory and io projections // as is done for allocations and macro expansion. return DirectCallGenerator::generate(jvms, parent_parser); }
IndexSet::BitBlock *IndexSet::alloc_block() { #ifdef ASSERT if (CollectIndexSetStatistics) { _alloc_total++; } #endif Compile *compile = Compile::current(); BitBlock* free_list = (BitBlock*)compile->indexSet_free_block_list(); if (free_list == NULL) { populate_free_list(); free_list = (BitBlock*)compile->indexSet_free_block_list(); } BitBlock *block = free_list; compile->set_indexSet_free_block_list(block->next()); block->clear(); return block; }
void LateInlineCallGenerator::do_late_inline() { // Can't inline it CallStaticJavaNode* call = call_node(); if (call == NULL || call->outcnt() == 0 || call->in(0) == NULL || call->in(0)->is_top()) { return; } const TypeTuple *r = call->tf()->domain(); for (int i1 = 0; i1 < method()->arg_size(); i1++) { if (call->in(TypeFunc::Parms + i1)->is_top() && r->field_at(TypeFunc::Parms + i1) != Type::HALF) { assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); return; } } if (call->in(TypeFunc::Memory)->is_top()) { assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); return; } Compile* C = Compile::current(); // Remove inlined methods from Compiler's lists. if (call->is_macro()) { C->remove_macro_node(call); } // Make a clone of the JVMState that appropriate to use for driving a parse JVMState* old_jvms = call->jvms(); JVMState* jvms = old_jvms->clone_shallow(C); uint size = call->req(); SafePointNode* map = new (C) SafePointNode(size, jvms); for (uint i1 = 0; i1 < size; i1++) { map->init_req(i1, call->in(i1)); } // Make sure the state is a MergeMem for parsing. if (!map->in(TypeFunc::Memory)->is_MergeMem()) { Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory)); C->initial_gvn()->set_type_bottom(mem); map->set_req(TypeFunc::Memory, mem); } uint nargs = method()->arg_size(); // blow away old call arguments Node* top = C->top(); for (uint i1 = 0; i1 < nargs; i1++) { map->set_req(TypeFunc::Parms + i1, top); } jvms->set_map(map); // Make enough space in the expression stack to transfer // the incoming arguments and return value. map->ensure_stack(jvms, jvms->method()->max_stack()); for (uint i1 = 0; i1 < nargs; i1++) { map->set_argument(jvms, i1, call->in(TypeFunc::Parms + i1)); } // This check is done here because for_method_handle_inline() method // needs jvms for inlined state. if (!do_late_inline_check(jvms)) { map->disconnect_inputs(NULL, C); return; } C->print_inlining_insert(this); CompileLog* log = C->log(); if (log != NULL) { log->head("late_inline method='%d'", log->identify(method())); JVMState* p = jvms; while (p != NULL) { log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); p = p->caller(); } log->tail("late_inline"); } // Setup default node notes to be picked up by the inlining Node_Notes* old_nn = C->default_node_notes(); if (old_nn != NULL) { Node_Notes* entry_nn = old_nn->clone(C); entry_nn->set_jvms(jvms); C->set_default_node_notes(entry_nn); } // Now perform the inling using the synthesized JVMState JVMState* new_jvms = _inline_cg->generate(jvms, NULL); if (new_jvms == NULL) return; // no change if (C->failing()) return; // Capture any exceptional control flow GraphKit kit(new_jvms); // Find the result object Node* result = C->top(); int result_size = method()->return_type()->size(); if (result_size != 0 && !kit.stopped()) { result = (result_size == 1) ? kit.pop() : kit.pop_pair(); } C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops()); C->env()->notice_inlined_method(_inline_cg->method()); C->set_inlining_progress(true); kit.replace_call(call, result); }
virtual void print_inlining_late(const char* msg) { CallNode* call = call_node(); Compile* C = Compile::current(); C->print_inlining_insert(this); C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg); }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(Matcher &matcher, Block_Array &bbs,int *ready_cnt, VectorSet &next_call, GrowableArray<uint> &node_latency) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# before schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt[n->_idx] = local; // Count em up // A few node types require changing a required edge to a precedence edge // before allocation. MachNode *m = n->is_Mach(); if( UseConcMarkSweepGC ) { if( m && m->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. // Note2: Can only depend on unmatched edge being last, // can not depend on its absolute position. Node *oop_store = n->in(n->req() - 1); n->del_req(n->req() - 1); n->add_prec(oop_store); assert(bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } if( m && m->ideal_Opcode() == Op_MemBarAcquire ) { Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt[_nodes[i2]->_idx] = 0; // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( bbs[m->_idx] ==this ) // Local-block user ready_cnt[m->_idx]--; // Fix ready count } } // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt[m->_idx] ) // Zero ready count? worklist.push(m); // Then on to worklist! } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, bbs); #ifndef PRODUCT if (TraceOptoPipelining) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt[idx]); tty->print("latency:%3d ", node_latency.at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT uint before_size = worklist.size(); if (TraceOptoPipelining && before_size > 1) { tty->print("# before select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %3d", n->_idx); } tty->print("\n"); } #endif // Select and pop a ready guy from worklist Node* n = select(worklist, bbs, ready_cnt, next_call, phi_cnt, node_latency); _nodes.map(phi_cnt++,n); // Schedule him next MachNode *m = n->is_Mach(); #ifndef PRODUCT if (TraceOptoPipelining && before_size > 1) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", node_latency.at_grow(n->_idx)); n->dump(); tty->print("# after select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %4d", n->_idx); } tty->print("\n"); } #endif if( m ) { MachCallNode *mcall = m->is_MachCall(); if( mcall ) { phi_cnt = sched_call(matcher, bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true) { // Retry with subsume_loads == false C->set_result(Compile::Comp_subsumed_load_conflict); } else { // Bailout without retry C->set_result(Compile::Comp_no_retry); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# after schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif return true; }
inline void operator delete( void* ptr ) { Compile* compile = Compile::current(); compile->type_arena()->Afree(ptr,compile->type_last_size()); }
void LateInlineCallGenerator::do_late_inline() { // Can't inline it if (call_node() == NULL || call_node()->outcnt() == 0 || call_node()->in(0) == NULL || call_node()->in(0)->is_top()) return; CallStaticJavaNode* call = call_node(); // Make a clone of the JVMState that appropriate to use for driving a parse Compile* C = Compile::current(); JVMState* jvms = call->jvms()->clone_shallow(C); uint size = call->req(); SafePointNode* map = new (C, size) SafePointNode(size, jvms); for (uint i1 = 0; i1 < size; i1++) { map->init_req(i1, call->in(i1)); } // Make sure the state is a MergeMem for parsing. if (!map->in(TypeFunc::Memory)->is_MergeMem()) { map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory))); } // Make enough space for the expression stack and transfer the incoming arguments int nargs = method()->arg_size(); jvms->set_map(map); map->ensure_stack(jvms, jvms->method()->max_stack()); if (nargs > 0) { for (int i1 = 0; i1 < nargs; i1++) { map->set_req(i1 + jvms->argoff(), call->in(TypeFunc::Parms + i1)); } } CompileLog* log = C->log(); if (log != NULL) { log->head("late_inline method='%d'", log->identify(method())); JVMState* p = jvms; while (p != NULL) { log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); p = p->caller(); } log->tail("late_inline"); } // Setup default node notes to be picked up by the inlining Node_Notes* old_nn = C->default_node_notes(); if (old_nn != NULL) { Node_Notes* entry_nn = old_nn->clone(C); entry_nn->set_jvms(jvms); C->set_default_node_notes(entry_nn); } // Now perform the inling using the synthesized JVMState JVMState* new_jvms = _inline_cg->generate(jvms); if (new_jvms == NULL) return; // no change if (C->failing()) return; // Capture any exceptional control flow GraphKit kit(new_jvms); // Find the result object Node* result = C->top(); int result_size = method()->return_type()->size(); if (result_size != 0 && !kit.stopped()) { result = (result_size == 1) ? kit.pop() : kit.pop_pair(); } kit.replace_call(call, result); }
CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) { GraphKit kit(jvms); PhaseGVN& gvn = kit.gvn(); Compile* C = kit.C; vmIntrinsics::ID iid = callee->intrinsic_id(); switch (iid) { case vmIntrinsics::_invokeBasic: { // get MethodHandle receiver Node* receiver = kit.argument(0); if (receiver->Opcode() == Op_ConP) { const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget(); guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove const int vtable_index = methodOopDesc::invalid_vtable_index; CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS); if (cg != NULL && cg->is_inline()) return cg; } else { if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant"); } } break; case vmIntrinsics::_linkToVirtual: case vmIntrinsics::_linkToStatic: case vmIntrinsics::_linkToSpecial: case vmIntrinsics::_linkToInterface: { // pop MemberName argument Node* member_name = kit.argument(callee->arg_size() - 1); if (member_name->Opcode() == Op_ConP) { const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget(); // In lamda forms we erase signature types to avoid resolving issues // involving class loaders. When we optimize a method handle invoke // to a direct call we must cast the receiver and arguments to its // actual types. ciSignature* signature = target->signature(); const int receiver_skip = target->is_static() ? 0 : 1; // Cast receiver to its type. if (!target->is_static()) { Node* arg = kit.argument(0); const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr(); const Type* sig_type = TypeOopPtr::make_from_klass(signature->accessing_klass()); if (arg_type != NULL && !arg_type->higher_equal(sig_type)) { Node* cast_obj = gvn.transform(new (C) CheckCastPPNode(kit.control(), arg, sig_type)); kit.set_argument(0, cast_obj); } } // Cast reference arguments to its type. for (int i = 0; i < signature->count(); i++) { ciType* t = signature->type_at(i); if (t->is_klass()) { Node* arg = kit.argument(receiver_skip + i); const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr(); const Type* sig_type = TypeOopPtr::make_from_klass(t->as_klass()); if (arg_type != NULL && !arg_type->higher_equal(sig_type)) { Node* cast_obj = gvn.transform(new (C) CheckCastPPNode(kit.control(), arg, sig_type)); kit.set_argument(receiver_skip + i, cast_obj); } } } const int vtable_index = methodOopDesc::invalid_vtable_index; const bool call_is_virtual = target->is_abstract(); // FIXME workaround CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS); if (cg != NULL && cg->is_inline()) return cg; } } break; default: fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); break; } return NULL; }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up #ifdef ASSERT if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Check the precedence edges for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } } } #endif // A few node types require changing a required edge to a precedence edge // before allocation. if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(_nodes[i2]->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( cfg->_bbs[m->_idx] ==this ) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt.at(m->_idx) ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, cfg->_bbs); #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif uint max_idx = (uint)ready_cnt.length(); // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); _nodes.map(phi_cnt++,n); // Schedule him next #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } if (n->is_Mach() && n->as_Mach()->has_call()) { RegMask regs; regs.Insert(matcher.c_frame_pointer()); regs.OR(n->out_RegMask()); MachProjNode *proj = new (matcher.C, 1) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); cfg->_bbs.map(proj->_idx,this); _nodes.insert(phi_cnt++, proj); add_call_kills(proj, regs, matcher._c_reg_save_policy, false); } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( cfg->_bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------implicit_null_check---------------------------- // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. // The proj is the control projection for the not-null case. // The val is the pointer being checked for nullness or // decodeHeapOop_not_null node if it did not fold into address. void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { // Assume if null check need for 0 offset then always needed // Intel solaris doesn't support any null checks yet and no // mechanism exists (yet) to set the switches at an os_cpu level if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; // Make sure the ptr-is-null path appears to be uncommon! float f = end()->as_MachIf()->_prob; if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; if( f > PROB_UNLIKELY_MAG(4) ) return; uint bidx = 0; // Capture index of value into memop bool was_store; // Memory op is a store op // Get the successor block for if the test ptr is non-null Block* not_null_block; // this one goes with the proj Block* null_block; if (_nodes[_nodes.size()-1] == proj) { null_block = _succs[0]; not_null_block = _succs[1]; } else { assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); not_null_block = _succs[0]; null_block = _succs[1]; } while (null_block->is_Empty() == Block::empty_with_goto) { null_block = null_block->_succs[0]; } // Search the exception block for an uncommon trap. // (See Parse::do_if and Parse::do_ifnull for the reason // we need an uncommon trap. Briefly, we need a way to // detect failure of this optimization, as in 6366351.) { bool found_trap = false; for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { Node* nn = null_block->_nodes[i1]; if (nn->is_MachCall() && nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); if (trtype->isa_int() && trtype->is_int()->is_con()) { jint tr_con = trtype->is_int()->get_con(); Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con); Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con); assert((int)reason < (int)BitsPerInt, "recode bit map"); if (is_set_nth_bit(allowed_reasons, (int) reason) && action != Deoptimization::Action_none) { // This uncommon trap is sure to recompile, eventually. // When that happens, C->too_many_traps will prevent // this transformation from happening again. found_trap = true; } } break; } } if (!found_trap) { // We did not find an uncommon trap. return; } } // Check for decodeHeapOop_not_null node which did not fold into address bool is_decoden = ((intptr_t)val) & 1; val = (Node*)(((intptr_t)val) & ~1); assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() && (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity"); // Search the successor block for a load or store who's base value is also // the tested value. There may be several. Node_List *out = new Node_List(Thread::current()->resource_area()); MachNode *best = NULL; // Best found so far for (DUIterator i = val->outs(); val->has_out(i); i++) { Node *m = val->out(i); if( !m->is_Mach() ) continue; MachNode *mach = m->as_Mach(); was_store = false; int iop = mach->ideal_Opcode(); switch( iop ) { case Op_LoadB: case Op_LoadUS: case Op_LoadD: case Op_LoadF: case Op_LoadI: case Op_LoadL: case Op_LoadP: case Op_LoadN: case Op_LoadS: case Op_LoadKlass: case Op_LoadNKlass: case Op_LoadRange: case Op_LoadD_unaligned: case Op_LoadL_unaligned: assert(mach->in(2) == val, "should be address"); break; case Op_StoreB: case Op_StoreC: case Op_StoreCM: case Op_StoreD: case Op_StoreF: case Op_StoreI: case Op_StoreL: case Op_StoreP: case Op_StoreN: was_store = true; // Memory op is a store op // Stores will have their address in slot 2 (memory in slot 1). // If the value being nul-checked is in another slot, it means we // are storing the checked value, which does NOT check the value! if( mach->in(2) != val ) continue; break; // Found a memory op? case Op_StrComp: case Op_StrEquals: case Op_StrIndexOf: case Op_AryEq: // Not a legit memory op for implicit null check regardless of // embedded loads continue; default: // Also check for embedded loads if( !mach->needs_anti_dependence_check() ) continue; // Not an memory op; skip it if( must_clone[iop] ) { // Do not move nodes which produce flags because // RA will try to clone it to place near branch and // it will cause recompilation, see clone_node(). continue; } { // Check that value is used in memory address in // instructions with embedded load (CmpP val1,(val2+off)). Node* base; Node* index; const MachOper* oper = mach->memory_inputs(base, index); if (oper == NULL || oper == (MachOper*)-1) { continue; // Not an memory op; skip it } if (val == base || val == index && val->bottom_type()->isa_narrowoop()) { break; // Found it } else { continue; // Skip it } } break; } // check if the offset is not too high for implicit exception { intptr_t offset = 0; const TypePtr *adr_type = NULL; // Do not need this return value here const Node* base = mach->get_base_and_disp(offset, adr_type); if (base == NULL || base == NodeSentinel) { // Narrow oop address doesn't have base, only index if( val->bottom_type()->isa_narrowoop() && MacroAssembler::needs_explicit_null_check(offset) ) continue; // Give up if offset is beyond page size // cannot reason about it; is probably not implicit null exception } else { const TypePtr* tptr; if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { // 32-bits narrow oop can be the base of address expressions tptr = base->bottom_type()->make_ptr(); } else { // only regular oops are expected here tptr = base->bottom_type()->is_ptr(); } // Give up if offset is not a compile-time constant if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) continue; offset += tptr->_offset; // correct if base is offseted if( MacroAssembler::needs_explicit_null_check(offset) ) continue; // Give up is reference is beyond 4K page size } } // Check ctrl input to see if the null-check dominates the memory op Block *cb = cfg->_bbs[mach->_idx]; cb = cb->_idom; // Always hoist at least 1 block if( !was_store ) { // Stores can be hoisted only one block while( cb->_dom_depth > (_dom_depth + 1)) cb = cb->_idom; // Hoist loads as far as we want // The non-null-block should dominate the memory op, too. Live // range spilling will insert a spill in the non-null-block if it is // needs to spill the memory op for an implicit null check. if (cb->_dom_depth == (_dom_depth + 1)) { if (cb != not_null_block) continue; cb = cb->_idom; } } if( cb != this ) continue; // Found a memory user; see if it can be hoisted to check-block uint vidx = 0; // Capture index of value into memop uint j; for( j = mach->req()-1; j > 0; j-- ) { if( mach->in(j) == val ) { vidx = j; // Ignore DecodeN val which could be hoisted to where needed. if( is_decoden ) continue; } // Block of memory-op input Block *inb = cfg->_bbs[mach->in(j)->_idx]; Block *b = this; // Start from nul check while( b != inb && b->_dom_depth > inb->_dom_depth ) b = b->_idom; // search upwards for input // See if input dominates null check if( b != inb ) break; } if( j > 0 ) continue; Block *mb = cfg->_bbs[mach->_idx]; // Hoisting stores requires more checks for the anti-dependence case. // Give up hoisting if we have to move the store past any load. if( was_store ) { Block *b = mb; // Start searching here for a local load // mach use (faulting) trying to hoist // n might be blocker to hoisting while( b != this ) { uint k; for( k = 1; k < b->_nodes.size(); k++ ) { Node *n = b->_nodes[k]; if( n->needs_anti_dependence_check() && n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) break; // Found anti-dependent load } if( k < b->_nodes.size() ) break; // Found anti-dependent load // Make sure control does not do a merge (would have to check allpaths) if( b->num_preds() != 2 ) break; b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block } if( b != this ) continue; } // Make sure this memory op is not already being used for a NullCheck Node *e = mb->end(); if( e->is_MachNullCheck() && e->in(1) == mach ) continue; // Already being used as a NULL check // Found a candidate! Pick one with least dom depth - the highest // in the dom tree should be closest to the null check. if( !best || cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { best = mach; bidx = vidx; } } // No candidate! if( !best ) return; // ---- Found an implicit null check extern int implicit_null_checks; implicit_null_checks++; if( is_decoden ) { // Check if we need to hoist decodeHeapOop_not_null first. Block *valb = cfg->_bbs[val->_idx]; if( this != valb && this->_dom_depth < valb->_dom_depth ) { // Hoist it up to the end of the test block. valb->find_remove(val); this->add_inst(val); cfg->_bbs.map(val->_idx,this); // DecodeN on x86 may kill flags. Check for flag-killing projections // that also need to be hoisted. for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { Node* n = val->fast_out(j); if( n->is_MachProj() ) { cfg->_bbs[n->_idx]->find_remove(n); this->add_inst(n); cfg->_bbs.map(n->_idx,this); } } } } // Hoist the memory candidate up to the end of the test block. Block *old_block = cfg->_bbs[best->_idx]; old_block->find_remove(best); add_inst(best); cfg->_bbs.map(best->_idx,this); // Move the control dependence if (best->in(0) && best->in(0) == old_block->_nodes[0]) best->set_req(0, _nodes[0]); // Check for flag-killing projections that also need to be hoisted // Should be DU safe because no edge updates. for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { Node* n = best->fast_out(j); if( n->is_MachProj() ) { cfg->_bbs[n->_idx]->find_remove(n); add_inst(n); cfg->_bbs.map(n->_idx,this); } } Compile *C = cfg->C; // proj==Op_True --> ne test; proj==Op_False --> eq test. // One of two graph shapes got matched: // (IfTrue (If (Bool NE (CmpP ptr NULL)))) // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) // NULL checks are always branch-if-eq. If we see a IfTrue projection // then we are replacing a 'ne' test with a 'eq' NULL check test. // We need to flip the projections to keep the same semantics. if( proj->Opcode() == Op_IfTrue ) { // Swap order of projections in basic block to swap branch targets Node *tmp1 = _nodes[end_idx()+1]; Node *tmp2 = _nodes[end_idx()+2]; _nodes.map(end_idx()+1, tmp2); _nodes.map(end_idx()+2, tmp1); Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input tmp1->replace_by(tmp); tmp2->replace_by(tmp1); tmp->replace_by(tmp2); tmp->destruct(); } // Remove the existing null check; use a new implicit null check instead. // Since schedule-local needs precise def-use info, we need to correct // it as well. Node *old_tst = proj->in(0); MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); _nodes.map(end_idx(),nul_chk); cfg->_bbs.map(nul_chk->_idx,this); // Redirect users of old_test to nul_chk for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) old_tst->last_out(i2)->set_req(0, nul_chk); // Clean-up any dead code for (uint i3 = 0; i3 < old_tst->req(); i3++) old_tst->set_req(i3, NULL); cfg->latency_from_uses(nul_chk); cfg->latency_from_uses(best); }
JVMState* PredictedDynamicCallGenerator::generate(JVMState* jvms) { GraphKit kit(jvms); Compile* C = kit.C; PhaseGVN& gvn = kit.gvn(); CompileLog* log = C->log(); if (log != NULL) { log->elem("predicted_dynamic_call bci='%d'", jvms->bci()); } const TypeOopPtr* predicted_mh_ptr = TypeOopPtr::make_from_constant(_predicted_method_handle, true); Node* predicted_mh = kit.makecon(predicted_mh_ptr); Node* bol = NULL; int bc = jvms->method()->java_code_at_bci(jvms->bci()); if (bc != Bytecodes::_invokedynamic) { // This is the selectAlternative idiom for guardWithTest or // similar idioms. Node* receiver = kit.argument(0); // Check if the MethodHandle is the expected one Node* cmp = gvn.transform(new (C, 3) CmpPNode(receiver, predicted_mh)); bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) ); } else { // Get the constant pool cache from the caller class. ciMethod* caller_method = jvms->method(); ciBytecodeStream str(caller_method); str.force_bci(jvms->bci()); // Set the stream to the invokedynamic bci. ciCPCache* cpcache = str.get_cpcache(); // Get the offset of the CallSite from the constant pool cache // pointer. int index = str.get_method_index(); size_t call_site_offset = cpcache->get_f1_offset(index); // Load the CallSite object from the constant pool cache. const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass()); Node* cpcache_adr = kit.makecon(cpcache_type); Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset); // The oops in the constant pool cache are not compressed; load then as raw pointers. Node* call_site = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw); // Load the target MethodHandle from the CallSite object. const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass()); Node* target_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes()); Node* target_mh = kit.make_load(kit.control(), target_adr, target_type, T_OBJECT); // Check if the MethodHandle is still the same. Node* cmp = gvn.transform(new (C, 3) CmpPNode(target_mh, predicted_mh)); bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) ); } IfNode* iff = kit.create_and_xform_if(kit.control(), bol, _hit_prob, COUNT_UNKNOWN); kit.set_control( gvn.transform(new (C, 1) IfTrueNode (iff))); Node* slow_ctl = gvn.transform(new (C, 1) IfFalseNode(iff)); SafePointNode* slow_map = NULL; JVMState* slow_jvms; { PreserveJVMState pjvms(&kit); kit.set_control(slow_ctl); if (!kit.stopped()) { slow_jvms = _if_missed->generate(kit.sync_jvms()); if (kit.failing()) return NULL; // might happen because of NodeCountInliningCutoff assert(slow_jvms != NULL, "must be"); kit.add_exception_states_from(slow_jvms); kit.set_map(slow_jvms->map()); if (!kit.stopped()) slow_map = kit.stop(); } } if (kit.stopped()) { // Instance exactly does not matches the desired type. kit.set_jvms(slow_jvms); return kit.transfer_exceptions_into_jvms(); } // Make the hot call: JVMState* new_jvms = _if_hit->generate(kit.sync_jvms()); if (new_jvms == NULL) { // Inline failed, so make a direct call. assert(_if_hit->is_inline(), "must have been a failed inline"); CallGenerator* cg = CallGenerator::for_direct_call(_if_hit->method()); new_jvms = cg->generate(kit.sync_jvms()); } kit.add_exception_states_from(new_jvms); kit.set_jvms(new_jvms); // Need to merge slow and fast? if (slow_map == NULL) { // The fast path is the only path remaining. return kit.transfer_exceptions_into_jvms(); } if (kit.stopped()) { // Inlined method threw an exception, so it's just the slow path after all. kit.set_jvms(slow_jvms); return kit.transfer_exceptions_into_jvms(); } // Finish the diamond. kit.C->set_has_split_ifs(true); // Has chance for split-if optimization RegionNode* region = new (C, 3) RegionNode(3); region->init_req(1, kit.control()); region->init_req(2, slow_map->control()); kit.set_control(gvn.transform(region)); Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); iophi->set_req(2, slow_map->i_o()); kit.set_i_o(gvn.transform(iophi)); kit.merge_memory(slow_map->merged_memory(), region, 2); uint tos = kit.jvms()->stkoff() + kit.sp(); uint limit = slow_map->req(); for (uint i = TypeFunc::Parms; i < limit; i++) { // Skip unused stack slots; fast forward to monoff(); if (i == tos) { i = kit.jvms()->monoff(); if( i >= limit ) break; } Node* m = kit.map()->in(i); Node* n = slow_map->in(i); if (m != n) { const Type* t = gvn.type(m)->meet(gvn.type(n)); Node* phi = PhiNode::make(region, m, t); phi->set_req(2, n); kit.map()->set_req(i, gvn.transform(phi)); } } return kit.transfer_exceptions_into_jvms(); }
// Notify optimizer that a node has been modified // Node: This assumes that escape analysis is run before // PhaseIterGVN creation void record_for_optimizer(Node *n) { _compile->record_for_igvn(n); }
CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) { GraphKit kit(jvms); PhaseGVN& gvn = kit.gvn(); Compile* C = kit.C; vmIntrinsics::ID iid = callee->intrinsic_id(); input_not_const = true; switch (iid) { case vmIntrinsics::_invokeBasic: { // Get MethodHandle receiver: Node* receiver = kit.argument(0); if (receiver->Opcode() == Op_ConP) { input_not_const = false; const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget(); guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove const int vtable_index = Method::invalid_vtable_index; CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, NULL, true, true); assert(cg == NULL || !cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; } } break; case vmIntrinsics::_linkToVirtual: case vmIntrinsics::_linkToStatic: case vmIntrinsics::_linkToSpecial: case vmIntrinsics::_linkToInterface: { // Get MemberName argument: Node* member_name = kit.argument(callee->arg_size() - 1); if (member_name->Opcode() == Op_ConP) { input_not_const = false; const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget(); // In lamda forms we erase signature types to avoid resolving issues // involving class loaders. When we optimize a method handle invoke // to a direct call we must cast the receiver and arguments to its // actual types. ciSignature* signature = target->signature(); const int receiver_skip = target->is_static() ? 0 : 1; // Cast receiver to its type. if (!target->is_static()) { Node* arg = kit.argument(0); const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr(); const Type* sig_type = TypeOopPtr::make_from_klass(signature->accessing_klass()); if (arg_type != NULL && !arg_type->higher_equal(sig_type)) { Node* cast_obj = gvn.transform(new (C) CheckCastPPNode(kit.control(), arg, sig_type)); kit.set_argument(0, cast_obj); } } // Cast reference arguments to its type. for (int i = 0; i < signature->count(); i++) { ciType* t = signature->type_at(i); if (t->is_klass()) { Node* arg = kit.argument(receiver_skip + i); const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr(); const Type* sig_type = TypeOopPtr::make_from_klass(t->as_klass()); if (arg_type != NULL && !arg_type->higher_equal(sig_type)) { Node* cast_obj = gvn.transform(new (C) CheckCastPPNode(kit.control(), arg, sig_type)); kit.set_argument(receiver_skip + i, cast_obj); } } } // Try to get the most accurate receiver type const bool is_virtual = (iid == vmIntrinsics::_linkToVirtual); const bool is_virtual_or_interface = (is_virtual || iid == vmIntrinsics::_linkToInterface); int vtable_index = Method::invalid_vtable_index; bool call_does_dispatch = false; ciKlass* speculative_receiver_type = NULL; if (is_virtual_or_interface) { ciInstanceKlass* klass = target->holder(); Node* receiver_node = kit.argument(0); const TypeOopPtr* receiver_type = gvn.type(receiver_node)->isa_oopptr(); // call_does_dispatch and vtable_index are out-parameters. They might be changed. target = C->optimize_virtual_call(caller, jvms->bci(), klass, target, receiver_type, is_virtual, call_does_dispatch, vtable_index); // out-parameters // We lack profiling at this call but type speculation may // provide us with a type speculative_receiver_type = receiver_type->speculative_type(); } CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms, true, PROB_ALWAYS, speculative_receiver_type, true, true); assert(cg == NULL || !cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; } } break; default: fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); break; } return NULL; }
//------------------------------do_call---------------------------------------- // Handle your basic call. Inline if we can & want to, else just setup call. void Parse::do_call() { // It's likely we are going to add debug info soon. // Also, if we inline a guy who eventually needs debug info for this JVMS, // our contribution to it is cleaned up right here. kill_dead_locals(); // Set frequently used booleans bool is_virtual = bc() == Bytecodes::_invokevirtual; bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface; bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial; // Find target being called bool will_link; ciMethod* dest_method = iter().get_method(will_link); ciInstanceKlass* holder_klass = dest_method->holder(); ciKlass* holder = iter().get_declared_method_holder(); ciInstanceKlass* klass = ciEnv::get_instance_klass_for_declared_method_holder(holder); int nargs = dest_method->arg_size(); // See if the receiver (if any) is NULL, hence we always throw BEFORE // attempting to resolve the call or initialize the holder class. Doing so // out of order opens a window where we can endlessly deopt because the call // holder is not initialized, but the call never actually happens (forcing // class initialization) because we only see NULL receivers. CPData_Invoke *caller_cpdi = cpdata()->as_Invoke(bc()); debug_only( assert(caller_cpdi->is_Invoke(), "Not invoke!") ); if( is_virtual_or_interface && _gvn.type(stack(sp() - nargs))->higher_equal(TypePtr::NULL_PTR) ) { builtin_throw( Deoptimization::Reason_null_check, "null receiver", caller_cpdi, caller_cpdi->saw_null(), /*must_throw=*/true ); return; } // uncommon-trap when callee is unloaded, uninitialized or will not link // bailout when too many arguments for register representation if (!will_link || can_not_compile_call_site(dest_method, klass)) { return; } assert(FAM||holder_klass->is_loaded(),""); assert(dest_method->is_static() == !has_receiver, "must match bc"); // Note: this takes into account invokeinterface of methods declared in java/lang/Object, // which should be invokevirtuals but according to the VM spec may be invokeinterfaces assert(holder_klass->is_interface() || holder_klass->super() == NULL || (bc() != Bytecodes::_invokeinterface), "must match bc"); // Note: In the absence of miranda methods, an abstract class K can perform // an invokevirtual directly on an interface method I.m if K implements I. // --------------------- // Does Class Hierarchy Analysis reveal only a single target of a v-call? // Then we may inline or make a static call, but become dependent on there being only 1 target. // Does the call-site type profile reveal only one receiver? // Then we may introduce a run-time check and inline on the path where it succeeds. // The other path may uncommon_trap, check for another receiver, or do a v-call. // Choose call strategy. bool call_is_virtual = is_virtual_or_interface; int vtable_index = methodOopDesc::invalid_vtable_index; ciMethod* call_method = dest_method; // Try to get the most accurate receiver type if (is_virtual_or_interface) { Node* receiver_node = stack(sp() - nargs); const TypeInstPtr*inst_type=_gvn.type(receiver_node)->isa_instptr(); if( inst_type ) { ciInstanceKlass*ikl=inst_type->klass()->as_instance_klass(); // If the receiver is not yet linked then: (1) we never can make this // call because no objects can be created until linkage, and (2) CHA // reports incorrect answers... so do not bother with making the call // until after the klass gets linked. ciInstanceKlass *ikl2 = ikl->is_subtype_of(klass) ? ikl : klass; if(!ikl->is_linked()){ uncommon_trap(Deoptimization::Reason_uninitialized,klass,"call site where receiver is not linked",false); return; } } const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr(); ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, dest_method, receiver_type); // Have the call been sufficiently improved such that it is no longer a virtual? if (optimized_virtual_method != NULL) { call_method = optimized_virtual_method; call_is_virtual = false; } else if (false) { // We can make a vtable call at this site vtable_index = call_method->resolve_vtable_index(method()->holder(), klass); } } // Note: It's OK to try to inline a virtual call. // The call generator will not attempt to inline a polymorphic call // unless it knows how to optimize the receiver dispatch. bool try_inline=(C->do_inlining()||InlineAccessors)&& (!C->method()->should_disable_inlining()) && (call_method->number_of_breakpoints() == 0); // Get profile data for the *callee*. First see if we have precise // CodeProfile for this exact inline because C1 inlined it already. CodeProfile *callee_cp; int callee_cp_inloff; if( caller_cpdi->inlined_method_oid() == call_method->objectId() ) { callee_cp = c1_cp(); // Use same CodeProfile as current callee_cp_inloff = caller_cpdi->cpd_offset(); // But use inlined portion } else { // If callee has a cp, clone it and use callee_cp = call_method->codeprofile(true); callee_cp_inloff = 0; if (callee_cp || FAM) { // The cloned cp needs to be freed later Compile* C = Compile::current(); C->record_cloned_cp(callee_cp); } else { // Had profile info at top level, but not for this call site? // callee_cp will hold the just created cp, or whatever cp allocated by // other thread which wins the race in set_codeprofile callee_cp = call_method->set_codeprofile(CodeProfile::make(call_method)); } } CPData_Invoke *c2_caller_cpdi = UseC1 ? c2cpdata()->as_Invoke(bc()) : NULL; // --------------------- inc_sp(- nargs); // Temporarily pop args for JVM state of call JVMState* jvms = sync_jvms(); // --------------------- // Decide call tactic. // This call checks with CHA, the interpreter profile, intrinsics table, etc. // It decides whether inlining is desirable or not. CallGenerator*cg=C->call_generator(call_method,vtable_index,call_is_virtual,jvms,try_inline,prof_factor(),callee_cp,callee_cp_inloff,c2_caller_cpdi,caller_cpdi); // --------------------- // Round double arguments before call round_double_arguments(dest_method); #ifndef PRODUCT // Record first part of parsing work for this call parse_histogram()->record_change(); #endif // not PRODUCT assert(jvms == this->jvms(), "still operating on the right JVMS"); assert(jvms_in_sync(), "jvms must carry full info into CG"); // save across call, for a subsequent cast_not_null. Node* receiver = has_receiver ? argument(0) : NULL; JVMState* new_jvms = cg->generate(jvms, caller_cpdi, is_private_copy()); if( new_jvms == NULL ) { // Did it work? // When inlining attempt fails (e.g., too many arguments), // it may contaminate the current compile state, making it // impossible to pull back and try again. Once we call // cg->generate(), we are committed. If it fails, the whole // compilation task is compromised. if (failing()) return; if (PrintOpto || PrintInlining || PrintC2Inlining) { // Only one fall-back, so if an intrinsic fails, ignore any bytecodes. if (cg->is_intrinsic() && call_method->code_size() > 0) { C2OUT->print("Bailed out of intrinsic, will not inline: "); call_method->print_name(C2OUT); C2OUT->cr(); } } // This can happen if a library intrinsic is available, but refuses // the call site, perhaps because it did not match a pattern the // intrinsic was expecting to optimize. The fallback position is // to call out-of-line. try_inline = false; // Inline tactic bailed out. cg=C->call_generator(call_method,vtable_index,call_is_virtual,jvms,try_inline,prof_factor(),c1_cp(),c1_cp_inloff(),c2_caller_cpdi,caller_cpdi); new_jvms=cg->generate(jvms,caller_cpdi,is_private_copy()); assert(new_jvms!=NULL,"call failed to generate: calls should work"); if (c2_caller_cpdi) c2_caller_cpdi->_inlining_failure_id = IF_GENERALFAILURE; } if (cg->is_inline()) { C->env()->notice_inlined_method(call_method); } // Reset parser state from [new_]jvms, which now carries results of the call. // Return value (if any) is already pushed on the stack by the cg. add_exception_states_from(new_jvms); if (new_jvms->map()->control() == top()) { stop_and_kill_map(); } else { assert(new_jvms->same_calls_as(jvms), "method/bci left unchanged"); set_jvms(new_jvms); } if (!stopped()) { // This was some sort of virtual call, which did a null check for us. // Now we can assert receiver-not-null, on the normal return path. if (receiver != NULL && cg->is_virtual()) { Node*cast=cast_not_null(receiver,true); // %%% assert(receiver == cast, "should already have cast the receiver"); } // Round double result after a call from strict to non-strict code round_double_result(dest_method); // If the return type of the method is not loaded, assert that the // value we got is a null. Otherwise, we need to recompile. if (!dest_method->return_type()->is_loaded()) { // If there is going to be a trap, put it at the next bytecode: set_bci(iter().next_bci()); do_null_assert(peek(), T_OBJECT); set_bci(iter().cur_bci()); // put it back } else { assert0( call_method->return_type()->is_loaded() ); BasicType result_type = dest_method->return_type()->basic_type(); if(result_type==T_OBJECT||result_type==T_ARRAY){ const Type *t = peek()->bottom_type(); assert0( t == TypePtr::NULL_PTR || t->is_oopptr()->klass()->is_loaded() ); } } } // Restart record of parsing work after possible inlining of call #ifndef PRODUCT parse_histogram()->set_initial_state(bc()); #endif }