void MethodHandles::verify_klass(MacroAssembler* _masm, Register obj, SystemDictionary::WKID klass_id, const char* error_message) { Klass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); KlassHandle klass = SystemDictionary::well_known_klass(klass_id); Register temp = rdi; Register temp2 = noreg; LP64_ONLY(temp2 = rscratch1); // used by MacroAssembler::cmpptr Label L_ok, L_bad; BLOCK_COMMENT("verify_klass {"); __ verify_oop(obj); __ testptr(obj, obj); __ jcc(Assembler::zero, L_bad); __ push(temp); if (temp2 != noreg) __ push(temp2); #define UNPUSH { if (temp2 != noreg) __ pop(temp2); __ pop(temp); } __ load_klass(temp, obj); __ cmpptr(temp, ExternalAddress((address) klass_addr)); __ jcc(Assembler::equal, L_ok); intptr_t super_check_offset = klass->super_check_offset(); __ movptr(temp, Address(temp, super_check_offset)); __ cmpptr(temp, ExternalAddress((address) klass_addr)); __ jcc(Assembler::equal, L_ok); UNPUSH; __ bind(L_bad); __ STOP(error_message); __ BIND(L_ok); UNPUSH; BLOCK_COMMENT("} verify_klass"); }
void MethodHandles::verify_klass(MacroAssembler* _masm, Register obj, SystemDictionary::WKID klass_id, const char* error_message) { InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); KlassHandle klass = SystemDictionary::well_known_klass(klass_id); Register temp = rscratch2; Register temp2 = rscratch1; // used by MacroAssembler::cmpptr Label L_ok, L_bad; BLOCK_COMMENT("verify_klass {"); __ verify_oop(obj); __ cbz(obj, L_bad); __ push(RegSet::of(temp, temp2), sp); __ load_klass(temp, obj); __ cmpptr(temp, ExternalAddress((address) klass_addr)); __ br(Assembler::EQ, L_ok); intptr_t super_check_offset = klass->super_check_offset(); __ ldr(temp, Address(temp, super_check_offset)); __ cmpptr(temp, ExternalAddress((address) klass_addr)); __ br(Assembler::EQ, L_ok); __ pop(RegSet::of(temp, temp2), sp); __ bind(L_bad); __ stop(error_message); __ BIND(L_ok); __ pop(RegSet::of(temp, temp2), sp); BLOCK_COMMENT("} verify_klass"); }
/** * Method entry for static native methods: * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) */ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { if (UseCRC32Intrinsics) { address entry = __ pc(); // rbx,: Method* // rsi: senderSP must preserved for slow path, set SP to it on fast path // rdx: scratch // rdi: scratch Label slow_path; // If we need a safepoint check, generate full interpreter entry. ExternalAddress state(SafepointSynchronize::address_of_state()); __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), SafepointSynchronize::_not_synchronized); __ jcc(Assembler::notEqual, slow_path); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. // Load parameters const Register crc = rax; // crc const Register buf = rdx; // source java byte array address const Register len = rdi; // length // value x86_32 // interp. arg ptr ESP + 4 // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) // 3 2 1 0 // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) // 4 2,3 1 0 // Arguments are reversed on java expression stack __ movl(len, Address(rsp, 4 + 0)); // Length // Calculate address of start element if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC } else { __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC } __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); // result in rax // _areturn __ pop(rdi); // get return address __ mov(rsp, rsi); // set sp to sender sp __ jmp(rdi); // generate a vanilla native entry as the slow path __ bind(slow_path); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } return NULL; }
address InterpreterGenerator::generate_empty_entry(void) { // rbx,: methodOop // rcx: receiver (unused) // rsi: previous interpreter state (C++ interpreter) must preserve // rsi: sender sp must set sp to this value on return if (!UseFastEmptyMethods) return NULL; address entry_point = __ pc(); // If we need a safepoint check, generate full interpreter entry. Label slow_path; ExternalAddress state(SafepointSynchronize::address_of_state()); __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), SafepointSynchronize::_not_synchronized); __ jcc(Assembler::notEqual, slow_path); // do nothing for empty methods (do not even increment invocation counter) // Code: _return // _return // return w/o popping parameters __ pop(rax); __ mov(rsp, rsi); __ jmp(rax); __ bind(slow_path); (void) generate_normal_entry(false); return entry_point; }
void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { const int hdr_offset = oopDesc::mark_offset_in_bytes(); assert_different_registers(hdr, obj, disp_hdr); NearLabel done; verify_oop(obj); // Load object header. z_lg(hdr, Address(obj, hdr_offset)); // Save object being locked into the BasicObjectLock... z_stg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); if (UseBiasedLocking) { biased_locking_enter(obj, hdr, Z_R1_scratch, Z_R0_scratch, done, &slow_case); } // and mark it as unlocked. z_oill(hdr, markOopDesc::unlocked_value); // Save unlocked object header into the displaced header location on the stack. z_stg(hdr, Address(disp_hdr, (intptr_t)0)); // Test if object header is still the same (i.e. unlocked), and if so, store the // displaced header address in the object header. If it is not the same, get the // object header instead. z_csg(hdr, disp_hdr, hdr_offset, obj); // If the object header was the same, we're done. if (PrintBiasedLockingStatistics) { Unimplemented(); #if 0 cond_inc32(Assembler::equal, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); #endif } branch_optimized(Assembler::bcondEqual, done); // If the object header was not the same, it is now in the hdr register. // => Test if it is a stack pointer into the same stack (recursive locking), i.e.: // // 1) (hdr & markOopDesc::lock_mask_in_place) == 0 // 2) rsp <= hdr // 3) hdr <= rsp + page_size // // These 3 tests can be done by evaluating the following expression: // // (hdr - Z_SP) & (~(page_size-1) | markOopDesc::lock_mask_in_place) // // assuming both the stack pointer and page_size have their least // significant 2 bits cleared and page_size is a power of 2 z_sgr(hdr, Z_SP); load_const_optimized(Z_R0_scratch, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place)); z_ngr(hdr, Z_R0_scratch); // AND sets CC (result eq/ne 0). // For recursive locking, the result is zero. => Save it in the displaced header // location (NULL in the displaced hdr location indicates recursive locking). z_stg(hdr, Address(disp_hdr, (intptr_t)0)); // Otherwise we don't care about the result and handle locking via runtime call. branch_optimized(Assembler::bcondNotZero, slow_case); // done bind(done); }
/** * Method entry for static native methods: * int java.util.zip.CRC32.update(int crc, int b) */ address TemplateInterpreterGenerator::generate_CRC32_update_entry() { if (UseCRC32Intrinsics) { address entry = __ pc(); // rbx,: Method* // r13: senderSP must preserved for slow path, set SP to it on fast path // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) Label slow_path; // If we need a safepoint check, generate full interpreter entry. ExternalAddress state(SafepointSynchronize::address_of_state()); __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), SafepointSynchronize::_not_synchronized); __ jcc(Assembler::notEqual, slow_path); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. // Load parameters const Register crc = rax; // crc const Register val = c_rarg0; // source java byte value const Register tbl = c_rarg1; // scratch // Arguments are reversed on java expression stack __ movl(val, Address(rsp, wordSize)); // byte value __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); __ notl(crc); // ~crc __ update_byte_crc32(crc, val, tbl); __ notl(crc); // ~crc // result in rax // _areturn __ pop(rdi); // get return address __ mov(rsp, r13); // set sp to sender sp __ jmp(rdi); // generate a vanilla native entry as the slow path __ bind(slow_path); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } return NULL; }
void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { // generate code to handle arguments iterate(fingerprint); // return result handler __ lea(rax, ExternalAddress(Interpreter::result_handler(method()->result_type()))); __ ret(0); __ flush(); }
/** * Method entry for static native methods: * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) */ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { if (UseCRC32Intrinsics) { address entry = __ pc(); // rbx,: Method* // r13: senderSP must preserved for slow path, set SP to it on fast path Label slow_path; // If we need a safepoint check, generate full interpreter entry. ExternalAddress state(SafepointSynchronize::address_of_state()); __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), SafepointSynchronize::_not_synchronized); __ jcc(Assembler::notEqual, slow_path); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. // Load parameters const Register crc = c_rarg0; // crc const Register buf = c_rarg1; // source java byte array address const Register len = c_rarg2; // length const Register off = len; // offset (never overlaps with 'len') // Arguments are reversed on java expression stack // Calculate address of start element if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { __ movptr(buf, Address(rsp, 3*wordSize)); // long buf __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset __ addq(buf, off); // + offset __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC } else { __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset __ addq(buf, off); // + offset __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC } // Can now load 'len' since we're finished with 'off' __ movl(len, Address(rsp, wordSize)); // Length __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); // result in rax // _areturn __ pop(rdi); // get return address __ mov(rsp, r13); // set sp to sender sp __ jmp(rdi); // generate a vanilla native entry as the slow path __ bind(slow_path); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } return NULL; }
void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, oop cached_oop, address entry_point) { ResourceMark rm; CodeBuffer code(code_begin, ic_stub_code_size()); MacroAssembler* masm = new MacroAssembler(&code); // note: even though the code contains an embedded oop, we do not need reloc info // because // (1) the oop is old (i.e., doesn't matter for scavenges) // (2) these ICStubs are removed *before* a GC happens, so the roots disappear assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); masm->lea(rax, OopAddress((address) cached_oop)); masm->jump(ExternalAddress(entry_point)); }
void ArrayCopyStub::emit_code(LIR_Assembler* ce) { //---------------slow case: call to native----------------- __ bind(_entry); // Figure out where the args should go // This should really convert the IntrinsicID to the Method* and signature // but I don't know how to do that. // VMRegPair args[5]; BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; SharedRuntime::java_calling_convention(signature, args, 5, true); // push parameters // (src, src_pos, dest, destPos, length) Register r[5]; r[0] = src()->as_register(); r[1] = src_pos()->as_register(); r[2] = dst()->as_register(); r[3] = dst_pos()->as_register(); r[4] = length()->as_register(); // next registers will get stored on the stack for (int i = 0; i < 5 ; i++ ) { VMReg r_1 = args[i].first(); if (r_1->is_stack()) { int st_off = r_1->reg2stack() * wordSize; __ str (r[i], Address(sp, st_off)); } else { assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); } } ce->align_call(lir_static_call); ce->emit_static_call_stub(); if (ce->compilation()->bailed_out()) { return; // CodeCache is full } Address resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type); address call = __ trampoline_call(resolve); if (call == NULL) { ce->bailout("trampoline stub overflow"); return; } ce->add_call_info_here(info()); #ifndef PRODUCT __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); __ incrementw(Address(rscratch2)); #endif __ b(_continuation); }
void ConversionStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub"); if (input()->is_single_xmm()) { __ comiss(input()->as_xmm_float_reg(), ExternalAddress((address)&float_zero)); } else if (input()->is_double_xmm()) { __ comisd(input()->as_xmm_double_reg(), ExternalAddress((address)&double_zero)); } else { LP64_ONLY(ShouldNotReachHere()); __ push(rax); __ ftst(); __ fnstsw_ax(); __ sahf(); __ pop(rax); } Label NaN, do_return; __ jccb(Assembler::parity, NaN); __ jccb(Assembler::below, do_return); // input is > 0 -> return maxInt // result register already contains 0x80000000, so subtracting 1 gives 0x7fffffff __ decrement(result()->as_register()); __ jmpb(do_return); // input is NaN -> return 0 __ bind(NaN); __ xorptr(result()->as_register(), result()->as_register()); __ bind(do_return); __ jmp(_continuation); }
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { const char *name; switch (type) { case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; case T_BYTE: name = "jni_fast_GetByteField"; break; case T_CHAR: name = "jni_fast_GetCharField"; break; case T_SHORT: name = "jni_fast_GetShortField"; break; case T_INT: name = "jni_fast_GetIntField"; break; case T_LONG: name = "jni_fast_GetLongField"; break; case T_FLOAT: name = "jni_fast_GetFloatField"; break; case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); CodeBuffer cbuf(blob); MacroAssembler* masm = new MacroAssembler(&cbuf); address fast_entry = __ pc(); Label slow; unsigned long offset; __ adrp(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); Address safepoint_counter_addr(rcounter_addr, offset); __ ldrw(rcounter, safepoint_counter_addr); __ andw(rscratch1, rcounter, 1); __ cbnzw(rscratch1, slow); __ eor(robj, c_rarg1, rcounter); __ eor(robj, robj, rcounter); // obj, since // robj ^ rcounter ^ rcounter == robj // robj is address dependent on rcounter. __ ldr(robj, Address(robj, 0)); // *obj __ lsr(roffset, c_rarg2, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); // Used by the segfault handler switch (type) { case T_BOOLEAN: __ ldrb (result, Address(robj, roffset)); break; case T_BYTE: __ ldrsb (result, Address(robj, roffset)); break; case T_CHAR: __ ldrh (result, Address(robj, roffset)); break; case T_SHORT: __ ldrsh (result, Address(robj, roffset)); break; case T_FLOAT: __ ldrw (result, Address(robj, roffset)); break; case T_INT: __ ldrsw (result, Address(robj, roffset)); break; case T_DOUBLE: case T_LONG: __ ldr (result, Address(robj, roffset)); break; default: ShouldNotReachHere(); } // counter_addr is address dependent on result. __ eor(rcounter_addr, rcounter_addr, result); __ eor(rcounter_addr, rcounter_addr, result); __ ldrw(rscratch1, safepoint_counter_addr); __ cmpw(rcounter, rscratch1); __ br (Assembler::NE, slow); switch (type) { case T_FLOAT: __ fmovs(v0, result); break; case T_DOUBLE: __ fmovd(v0, result); break; default: __ mov(r0, result); break; } __ ret(lr); slowcase_entry_pclist[count++] = __ pc(); __ bind(slow); address slow_case_addr; switch (type) { case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; case T_INT: slow_case_addr = jni_GetIntField_addr(); break; case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } { __ enter(); __ lea(rscratch1, ExternalAddress(slow_case_addr)); __ blr(rscratch1); __ maybe_isb(); __ leave(); __ ret(lr); } __ flush (); return fast_entry; }
int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); assert(hdr == rax, "hdr must be rax, for the cmpxchg instruction"); assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; int null_check_offset = -1; verify_oop(obj); // save object being locked into the BasicObjectLock movptr(Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()), obj); if (UseBiasedLocking) { assert(scratch != noreg, "should have scratch register at this point"); null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); } else { null_check_offset = offset(); } // Load object header movptr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked orptr(hdr, markOopDesc::unlocked_value); // save unlocked object header into the displaced header location on the stack movptr(Address(disp_hdr, 0), hdr); // test if object header is still the same (i.e. unlocked), and if so, store the // displaced header address in the object header - if it is not the same, get the // object header instead if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg! cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); // if the object header was the same, we're done if (PrintBiasedLockingStatistics) { cond_inc32(Assembler::equal, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); } jcc(Assembler::equal, done); // if the object header was not the same, it is now in the hdr register // => test if it is a stack pointer into the same stack (recursive locking), i.e.: // // 1) (hdr & aligned_mask) == 0 // 2) rsp <= hdr // 3) hdr <= rsp + page_size // // these 3 tests can be done by evaluating the following expression: // // (hdr - rsp) & (aligned_mask - page_size) // // assuming both the stack pointer and page_size have their least // significant 2 bits cleared and page_size is a power of 2 subptr(hdr, rsp); andptr(hdr, aligned_mask - os::vm_page_size()); // for recursive locking, the result is zero => save it in the displaced header // location (NULL in the displaced hdr location indicates recursive locking) movptr(Address(disp_hdr, 0), hdr); // otherwise we don't care about the result and handle locking via runtime call jcc(Assembler::notZero, slow_case); // done bind(done); return null_check_offset; }
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { const char *name; switch (type) { case T_FLOAT: name = "jni_fast_GetFloatField"; break; case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow_with_pop, slow; // stack layout: offset from rsp (in words): // return pc 0 // jni env 1 // obj 2 // jfieldID 3 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr(rdx, Address(rsp, 2*wordSize)); // obj } __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr(rax, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { #ifndef _LP64 case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break; case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break; #else case T_FLOAT: __ movflt (xmm0, Address(robj, roffset, Address::times_1)); break; case T_DOUBLE: __ movdbl (xmm0, Address(robj, roffset, Address::times_1)); break; #endif // _LP64 default: ShouldNotReachHere(); } Address ca1; if (os::is_MP()) { __ fst_s (Address(rsp, -4)); __ lea(rdx, counter); __ movl (rax, Address(rsp, -4)); // garbage hi-order bits on 64bit are harmless. __ xorptr(rdx, rax); __ xorptr(rdx, rax); __ cmp32(rcx, Address(rdx, 0)); // rax, ^ counter_addr ^ rax, = address // ca1 is data dependent on the field // access. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow_with_pop); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif __ bind (slow_with_pop); // invalid load. pop FPU stack. __ fstp_d (0); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr; switch (type) { case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else switch (type) { case T_FLOAT: jni_fast_GetFloatField_fp = (GetFloatField_t)fast_entry; break; case T_DOUBLE: jni_fast_GetDoubleField_fp = (GetDoubleField_t)fast_entry; } return os::win32::fast_jni_accessor_wrapper(type); #endif }
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { const char *name; switch (type) { case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; case T_BYTE: name = "jni_fast_GetByteField"; break; case T_CHAR: name = "jni_fast_GetCharField"; break; case T_SHORT: name = "jni_fast_GetShortField"; break; case T_INT: name = "jni_fast_GetIntField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow; // stack layout: offset from rsp (in words): // return pc 0 // jni env 1 // obj 2 // jfieldID 3 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr (rdx, Address(rsp, 2*wordSize)); // obj } __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr (rax, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break; case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break; case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break; case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break; case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break; default: ShouldNotReachHere(); } Address ca1; if (os::is_MP()) { __ lea(rdx, counter); __ xorptr(rdx, rax); __ xorptr(rdx, rax); __ cmp32(rcx, Address(rdx, 0)); // ca1 is the same as ca because // rax, ^ counter_addr ^ rax, = address // ca1 is data dependent on rax,. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr; switch (type) { case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; case T_INT: slow_case_addr = jni_GetIntField_addr(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else switch (type) { case T_BOOLEAN: jni_fast_GetBooleanField_fp = (GetBooleanField_t)fast_entry; break; case T_BYTE: jni_fast_GetByteField_fp = (GetByteField_t)fast_entry; break; case T_CHAR: jni_fast_GetCharField_fp = (GetCharField_t)fast_entry; break; case T_SHORT: jni_fast_GetShortField_fp = (GetShortField_t)fast_entry; break; case T_INT: jni_fast_GetIntField_fp = (GetIntField_t)fast_entry; } return os::win32::fast_jni_accessor_wrapper(type); #endif }
// Code generation address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) { // rbx: methodOop // rcx: receiver method handle (must load from sp[MethodTypeForm.vmslots]) // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted) // rdx, rdi: garbage temp, blown away Register rbx_method = rbx; Register rcx_recv = rcx; Register rax_mtype = rax; Register rdx_temp = rdx; Register rdi_temp = rdi; // emit WrongMethodType path first, to enable jccb back-branch from main path Label wrong_method_type; __ bind(wrong_method_type); Label invoke_generic_slow_path; assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");; __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeExact); __ jcc(Assembler::notEqual, invoke_generic_slow_path); __ push(rax_mtype); // required mtype __ push(rcx_recv); // bad mh (1st stacked argument) __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry())); // here's where control starts out: __ align(CodeEntryAlignment); address entry_point = __ pc(); // fetch the MethodType from the method handle into rax (the 'check' register) { Register tem = rbx_method; for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) { __ movptr(rax_mtype, Address(tem, *pchase)); tem = rax_mtype; // in case there is another indirection } } // given the MethodType, find out where the MH argument is buried __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp))); Register rdx_vmslots = rdx_temp; __ movl(rdx_vmslots, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, rdi_temp))); __ movptr(rcx_recv, __ argument_address(rdx_vmslots)); trace_method_handle(_masm, "invokeExact"); __ check_method_handle_type(rax_mtype, rcx_recv, rdi_temp, wrong_method_type); __ jump_to_method_handle_entry(rcx_recv, rdi_temp); // for invokeGeneric (only), apply argument and result conversions on the fly __ bind(invoke_generic_slow_path); #ifdef ASSERT { Label L; __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeGeneric); __ jcc(Assembler::equal, L); __ stop("bad methodOop::intrinsic_id"); __ bind(L); } #endif //ASSERT Register rbx_temp = rbx_method; // don't need it now // make room on the stack for another pointer: Register rcx_argslot = rcx_recv; __ lea(rcx_argslot, __ argument_address(rdx_vmslots, 1)); insert_arg_slots(_masm, 2 * stack_move_unit(), _INSERT_REF_MASK, rcx_argslot, rbx_temp, rdx_temp); // load up an adapter from the calling type (Java weaves this) __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp))); Register rdx_adapter = rdx_temp; // __ load_heap_oop(rdx_adapter, Address(rdx_temp, java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes())); // deal with old JDK versions: __ lea(rdi_temp, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, rdi_temp))); __ cmpptr(rdi_temp, rdx_temp); Label sorry_no_invoke_generic; __ jcc(Assembler::below, sorry_no_invoke_generic); __ load_heap_oop(rdx_adapter, Address(rdi_temp, 0)); __ testptr(rdx_adapter, rdx_adapter); __ jcc(Assembler::zero, sorry_no_invoke_generic); __ movptr(Address(rcx_argslot, 1 * Interpreter::stackElementSize), rdx_adapter); // As a trusted first argument, pass the type being called, so the adapter knows // the actual types of the arguments and return values. // (Generic invokers are shared among form-families of method-type.) __ movptr(Address(rcx_argslot, 0 * Interpreter::stackElementSize), rax_mtype); // FIXME: assert that rdx_adapter is of the right method-type. __ mov(rcx, rdx_adapter); trace_method_handle(_masm, "invokeGeneric"); __ jump_to_method_handle_entry(rcx, rdi_temp); __ bind(sorry_no_invoke_generic); // no invokeGeneric implementation available! __ movptr(rcx_recv, Address(rcx_argslot, -1 * Interpreter::stackElementSize)); // recover original MH __ push(rax_mtype); // required mtype __ push(rcx_recv); // bad mh (1st stacked argument) __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry())); return entry_point; }
address JNI_FastGetField::generate_fast_get_long_field() { const char *name = "jni_fast_GetLongField"; ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow; // stack layout: offset from rsp (in words): // old rsi 0 // return pc 1 // jni env 2 // obj 3 // jfieldID 4 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ push (rsi); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr(rdx, Address(rsp, 3*wordSize)); // obj } __ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr(rsi, 2); // offset assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); speculative_load_pclist[count++] = __ pc(); __ movptr(rax, Address(rdx, rsi, Address::times_1)); #ifndef _LP64 speculative_load_pclist[count] = __ pc(); __ movl(rdx, Address(rdx, rsi, Address::times_1, 4)); #endif // _LP64 if (os::is_MP()) { __ lea(rsi, counter); __ xorptr(rsi, rdx); __ xorptr(rsi, rax); __ xorptr(rsi, rdx); __ xorptr(rsi, rax); __ cmp32(rcx, Address(rsi, 0)); // ca1 is the same as ca because // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address // ca1 is data dependent on both rax, and rdx. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow); __ pop (rsi); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif slowcase_entry_pclist[count-1] = __ pc(); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); __ pop (rsi); address slow_case_addr = jni_GetLongField_addr();; // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else jni_fast_GetLongField_fp = (GetLongField_t)fast_entry; return os::win32::fast_jni_accessor_wrapper(T_LONG); #endif }
//------------------------------------------------------------------------------ // MethodHandles::generate_method_handle_stub // // Generate an "entry" field for a method handle. // This determines how the method handle will respond to calls. void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) { // Here is the register state during an interpreted call, // as set up by generate_method_handle_interpreter_entry(): // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused) // - rcx: receiver method handle // - rax: method handle type (only used by the check_mtype entry point) // - rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted) // - rdx: garbage temp, can blow away const Register rcx_recv = rcx; const Register rax_argslot = rax; const Register rbx_temp = rbx; const Register rdx_temp = rdx; // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls) // and gen_c2i_adapter (from compiled calls): const Register saved_last_sp = LP64_ONLY(r13) NOT_LP64(rsi); // Argument registers for _raise_exception. // 32-bit: Pass first two oop/int args in registers ECX and EDX. const Register rarg0_code = LP64_ONLY(j_rarg0) NOT_LP64(rcx); const Register rarg1_actual = LP64_ONLY(j_rarg1) NOT_LP64(rdx); const Register rarg2_required = LP64_ONLY(j_rarg2) NOT_LP64(rdi); assert_different_registers(rarg0_code, rarg1_actual, rarg2_required, saved_last_sp); guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets"); // some handy addresses Address rbx_method_fie( rbx, methodOopDesc::from_interpreted_offset() ); Address rbx_method_fce( rbx, methodOopDesc::from_compiled_offset() ); Address rcx_mh_vmtarget( rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() ); Address rcx_dmh_vmindex( rcx_recv, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes() ); Address rcx_bmh_vmargslot( rcx_recv, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes() ); Address rcx_bmh_argument( rcx_recv, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes() ); Address rcx_amh_vmargslot( rcx_recv, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes() ); Address rcx_amh_argument( rcx_recv, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes() ); Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() ); Address vmarg; // __ argument_address(vmargslot) const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); if (have_entry(ek)) { __ nop(); // empty stubs make SG sick return; } address interp_entry = __ pc(); trace_method_handle(_masm, entry_name(ek)); BLOCK_COMMENT(entry_name(ek)); switch ((int) ek) { case _raise_exception: { // Not a real MH entry, but rather shared code for raising an // exception. Since we use the compiled entry, arguments are // expected in compiler argument registers. assert(raise_exception_method(), "must be set"); assert(raise_exception_method()->from_compiled_entry(), "method must be linked"); const Register rdi_pc = rax; __ pop(rdi_pc); // caller PC __ mov(rsp, saved_last_sp); // cut the stack back to where the caller started Register rbx_method = rbx_temp; Label L_no_method; // FIXME: fill in _raise_exception_method with a suitable java.lang.invoke method __ movptr(rbx_method, ExternalAddress((address) &_raise_exception_method)); __ testptr(rbx_method, rbx_method); __ jccb(Assembler::zero, L_no_method); const int jobject_oop_offset = 0; __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset)); // dereference the jobject __ testptr(rbx_method, rbx_method); __ jccb(Assembler::zero, L_no_method); __ verify_oop(rbx_method); NOT_LP64(__ push(rarg2_required)); __ push(rdi_pc); // restore caller PC __ jmp(rbx_method_fce); // jump to compiled entry // Do something that is at least causes a valid throw from the interpreter. __ bind(L_no_method); __ push(rarg2_required); __ push(rarg1_actual); __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry())); } break; case _invokestatic_mh: case _invokespecial_mh: { Register rbx_method = rbx_temp; __ load_heap_oop(rbx_method, rcx_mh_vmtarget); // target is a methodOop __ verify_oop(rbx_method); // same as TemplateTable::invokestatic or invokespecial, // minus the CP setup and profiling: if (ek == _invokespecial_mh) { // Must load & check the first argument before entering the target method. __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp); __ movptr(rcx_recv, __ argument_address(rax_argslot, -1)); __ null_check(rcx_recv); __ verify_oop(rcx_recv); } __ jmp(rbx_method_fie); } break; case _invokevirtual_mh: { // same as TemplateTable::invokevirtual, // minus the CP setup and profiling: // pick out the vtable index and receiver offset from the MH, // and then we can discard it: __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp); Register rbx_index = rbx_temp; __ movl(rbx_index, rcx_dmh_vmindex); // Note: The verifier allows us to ignore rcx_mh_vmtarget. __ movptr(rcx_recv, __ argument_address(rax_argslot, -1)); __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes()); // get receiver klass Register rax_klass = rax_argslot; __ load_klass(rax_klass, rcx_recv); __ verify_oop(rax_klass); // get target methodOop & entry point const int base = instanceKlass::vtable_start_offset() * wordSize; assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); Address vtable_entry_addr(rax_klass, rbx_index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()); Register rbx_method = rbx_temp; __ movptr(rbx_method, vtable_entry_addr); __ verify_oop(rbx_method); __ jmp(rbx_method_fie); } break; case _invokeinterface_mh: { // same as TemplateTable::invokeinterface, // minus the CP setup and profiling: // pick out the interface and itable index from the MH. __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp); Register rdx_intf = rdx_temp; Register rbx_index = rbx_temp; __ load_heap_oop(rdx_intf, rcx_mh_vmtarget); __ movl(rbx_index, rcx_dmh_vmindex); __ movptr(rcx_recv, __ argument_address(rax_argslot, -1)); __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes()); // get receiver klass Register rax_klass = rax_argslot; __ load_klass(rax_klass, rcx_recv); __ verify_oop(rax_klass); Register rdi_temp = rdi; Register rbx_method = rbx_index; // get interface klass Label no_such_interface; __ verify_oop(rdx_intf); __ lookup_interface_method(rax_klass, rdx_intf, // note: next two args must be the same: rbx_index, rbx_method, rdi_temp, no_such_interface); __ verify_oop(rbx_method); __ jmp(rbx_method_fie); __ hlt(); __ bind(no_such_interface); // Throw an exception. // For historical reasons, it will be IncompatibleClassChangeError. __ mov(rbx_temp, rcx_recv); // rarg2_required might be RCX assert_different_registers(rarg2_required, rbx_temp); __ movptr(rarg2_required, Address(rdx_intf, java_mirror_offset)); // required interface __ mov( rarg1_actual, rbx_temp); // bad receiver __ movl( rarg0_code, (int) Bytecodes::_invokeinterface); // who is complaining? __ jump(ExternalAddress(from_interpreted_entry(_raise_exception))); } break; case _bound_ref_mh: case _bound_int_mh: case _bound_long_mh: case _bound_ref_direct_mh: case _bound_int_direct_mh: case _bound_long_direct_mh: { bool direct_to_method = (ek >= _bound_ref_direct_mh); BasicType arg_type = T_ILLEGAL; int arg_mask = _INSERT_NO_MASK; int arg_slots = -1; get_ek_bound_mh_info(ek, arg_type, arg_mask, arg_slots); // make room for the new argument: __ movl(rax_argslot, rcx_bmh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot)); insert_arg_slots(_masm, arg_slots * stack_move_unit(), arg_mask, rax_argslot, rbx_temp, rdx_temp); // store bound argument into the new stack slot: __ load_heap_oop(rbx_temp, rcx_bmh_argument); if (arg_type == T_OBJECT) { __ movptr(Address(rax_argslot, 0), rbx_temp); } else { Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type)); const int arg_size = type2aelembytes(arg_type); __ load_sized_value(rdx_temp, prim_value_addr, arg_size, is_signed_subword_type(arg_type), rbx_temp); __ store_sized_value(Address(rax_argslot, 0), rdx_temp, arg_size, rbx_temp); } if (direct_to_method) { Register rbx_method = rbx_temp; __ load_heap_oop(rbx_method, rcx_mh_vmtarget); __ verify_oop(rbx_method); __ jmp(rbx_method_fie); } else { __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ verify_oop(rcx_recv); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } } break; case _adapter_retype_only: case _adapter_retype_raw: // immediately jump to the next MH layer: __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ verify_oop(rcx_recv); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); // This is OK when all parameter types widen. // It is also OK when a return type narrows. break; case _adapter_check_cast: { // temps: Register rbx_klass = rbx_temp; // interesting AMH data // check a reference argument before jumping to the next layer of MH: __ movl(rax_argslot, rcx_amh_vmargslot); vmarg = __ argument_address(rax_argslot); // What class are we casting to? __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object! __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes())); Label done; __ movptr(rdx_temp, vmarg); __ testptr(rdx_temp, rdx_temp); __ jcc(Assembler::zero, done); // no cast if null __ load_klass(rdx_temp, rdx_temp); // live at this point: // - rbx_klass: klass required by the target method // - rdx_temp: argument klass to test // - rcx_recv: adapter method handle __ check_klass_subtype(rdx_temp, rbx_klass, rax_argslot, done); // If we get here, the type check failed! // Call the wrong_method_type stub, passing the failing argument type in rax. Register rax_mtype = rax_argslot; __ movl(rax_argslot, rcx_amh_vmargslot); // reload argslot field __ movptr(rdx_temp, vmarg); assert_different_registers(rarg2_required, rdx_temp); __ load_heap_oop(rarg2_required, rcx_amh_argument); // required class __ mov( rarg1_actual, rdx_temp); // bad object __ movl( rarg0_code, (int) Bytecodes::_checkcast); // who is complaining? __ jump(ExternalAddress(from_interpreted_entry(_raise_exception))); __ bind(done); // get the new MH: __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_prim_to_prim: case _adapter_ref_to_prim: // handled completely by optimized cases __ stop("init_AdapterMethodHandle should not issue this"); break; case _adapter_opt_i2i: // optimized subcase of adapt_prim_to_prim //case _adapter_opt_f2i: // optimized subcase of adapt_prim_to_prim case _adapter_opt_l2i: // optimized subcase of adapt_prim_to_prim case _adapter_opt_unboxi: // optimized subcase of adapt_ref_to_prim { // perform an in-place conversion to int or an int subword __ movl(rax_argslot, rcx_amh_vmargslot); vmarg = __ argument_address(rax_argslot); switch (ek) { case _adapter_opt_i2i: __ movl(rdx_temp, vmarg); break; case _adapter_opt_l2i: { // just delete the extra slot; on a little-endian machine we keep the first __ lea(rax_argslot, __ argument_address(rax_argslot, 1)); remove_arg_slots(_masm, -stack_move_unit(), rax_argslot, rbx_temp, rdx_temp); vmarg = Address(rax_argslot, -Interpreter::stackElementSize); __ movl(rdx_temp, vmarg); } break; case _adapter_opt_unboxi: { // Load the value up from the heap. __ movptr(rdx_temp, vmarg); int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT); #ifdef ASSERT for (int bt = T_BOOLEAN; bt < T_INT; bt++) { if (is_subword_type(BasicType(bt))) assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), ""); } #endif __ null_check(rdx_temp, value_offset); __ movl(rdx_temp, Address(rdx_temp, value_offset)); // We load this as a word. Because we are little-endian, // the low bits will be correct, but the high bits may need cleaning. // The vminfo will guide us to clean those bits. } break; default: ShouldNotReachHere(); } // Do the requested conversion and store the value. Register rbx_vminfo = rbx_temp; __ movl(rbx_vminfo, rcx_amh_conversion); assert(CONV_VMINFO_SHIFT == 0, "preshifted"); // get the new MH: __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); // (now we are done with the old MH) // original 32-bit vmdata word must be of this form: // | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 | __ xchgptr(rcx, rbx_vminfo); // free rcx for shifts __ shll(rdx_temp /*, rcx*/); Label zero_extend, done; __ testl(rcx, CONV_VMINFO_SIGN_FLAG); __ jccb(Assembler::zero, zero_extend); // this path is taken for int->byte, int->short __ sarl(rdx_temp /*, rcx*/); __ jmpb(done); __ bind(zero_extend); // this is taken for int->char __ shrl(rdx_temp /*, rcx*/); __ bind(done); __ movl(vmarg, rdx_temp); // Store the value. __ xchgptr(rcx, rbx_vminfo); // restore rcx_recv __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_opt_i2l: // optimized subcase of adapt_prim_to_prim case _adapter_opt_unboxl: // optimized subcase of adapt_ref_to_prim { // perform an in-place int-to-long or ref-to-long conversion __ movl(rax_argslot, rcx_amh_vmargslot); // on a little-endian machine we keep the first slot and add another after __ lea(rax_argslot, __ argument_address(rax_argslot, 1)); insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK, rax_argslot, rbx_temp, rdx_temp); Address vmarg1(rax_argslot, -Interpreter::stackElementSize); Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize); switch (ek) { case _adapter_opt_i2l: { #ifdef _LP64 __ movslq(rdx_temp, vmarg1); // Load sign-extended __ movq(vmarg1, rdx_temp); // Store into first slot #else __ movl(rdx_temp, vmarg1); __ sarl(rdx_temp, BitsPerInt - 1); // __ extend_sign() __ movl(vmarg2, rdx_temp); // store second word #endif } break; case _adapter_opt_unboxl: { // Load the value up from the heap. __ movptr(rdx_temp, vmarg1); int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG); assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), ""); __ null_check(rdx_temp, value_offset); #ifdef _LP64 __ movq(rbx_temp, Address(rdx_temp, value_offset)); __ movq(vmarg1, rbx_temp); #else __ movl(rbx_temp, Address(rdx_temp, value_offset + 0*BytesPerInt)); __ movl(rdx_temp, Address(rdx_temp, value_offset + 1*BytesPerInt)); __ movl(vmarg1, rbx_temp); __ movl(vmarg2, rdx_temp); #endif } break; default: ShouldNotReachHere(); } __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_opt_f2d: // optimized subcase of adapt_prim_to_prim case _adapter_opt_d2f: // optimized subcase of adapt_prim_to_prim { // perform an in-place floating primitive conversion __ movl(rax_argslot, rcx_amh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot, 1)); if (ek == _adapter_opt_f2d) { insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK, rax_argslot, rbx_temp, rdx_temp); } Address vmarg(rax_argslot, -Interpreter::stackElementSize); #ifdef _LP64 if (ek == _adapter_opt_f2d) { __ movflt(xmm0, vmarg); __ cvtss2sd(xmm0, xmm0); __ movdbl(vmarg, xmm0); } else { __ movdbl(xmm0, vmarg); __ cvtsd2ss(xmm0, xmm0); __ movflt(vmarg, xmm0); } #else //_LP64 if (ek == _adapter_opt_f2d) { __ fld_s(vmarg); // load float to ST0 __ fstp_s(vmarg); // store single } else { __ fld_d(vmarg); // load double to ST0 __ fstp_s(vmarg); // store single } #endif //_LP64 if (ek == _adapter_opt_d2f) { remove_arg_slots(_masm, -stack_move_unit(), rax_argslot, rbx_temp, rdx_temp); } __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_prim_to_ref: __ unimplemented(entry_name(ek)); // %%% FIXME: NYI break; case _adapter_swap_args: case _adapter_rot_args: // handled completely by optimized cases __ stop("init_AdapterMethodHandle should not issue this"); break; case _adapter_opt_swap_1: case _adapter_opt_swap_2: case _adapter_opt_rot_1_up: case _adapter_opt_rot_1_down: case _adapter_opt_rot_2_up: case _adapter_opt_rot_2_down: { int swap_bytes = 0, rotate = 0; get_ek_adapter_opt_swap_rot_info(ek, swap_bytes, rotate); // 'argslot' is the position of the first argument to swap __ movl(rax_argslot, rcx_amh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot)); // 'vminfo' is the second Register rbx_destslot = rbx_temp; __ movl(rbx_destslot, rcx_amh_conversion); assert(CONV_VMINFO_SHIFT == 0, "preshifted"); __ andl(rbx_destslot, CONV_VMINFO_MASK); __ lea(rbx_destslot, __ argument_address(rbx_destslot)); DEBUG_ONLY(verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame")); if (!rotate) { for (int i = 0; i < swap_bytes; i += wordSize) { __ movptr(rdx_temp, Address(rax_argslot , i)); __ push(rdx_temp); __ movptr(rdx_temp, Address(rbx_destslot, i)); __ movptr(Address(rax_argslot, i), rdx_temp); __ pop(rdx_temp); __ movptr(Address(rbx_destslot, i), rdx_temp); } } else { // push the first chunk, which is going to get overwritten for (int i = swap_bytes; (i -= wordSize) >= 0; ) { __ movptr(rdx_temp, Address(rax_argslot, i)); __ push(rdx_temp); } if (rotate > 0) { // rotate upward __ subptr(rax_argslot, swap_bytes); #ifdef ASSERT { // Verify that argslot > destslot, by at least swap_bytes. Label L_ok; __ cmpptr(rax_argslot, rbx_destslot); __ jccb(Assembler::aboveEqual, L_ok); __ stop("source must be above destination (upward rotation)"); __ bind(L_ok); } #endif // work argslot down to destslot, copying contiguous data upwards // pseudo-code: // rax = src_addr - swap_bytes // rbx = dest_addr // while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--; Label loop; __ bind(loop); __ movptr(rdx_temp, Address(rax_argslot, 0)); __ movptr(Address(rax_argslot, swap_bytes), rdx_temp); __ addptr(rax_argslot, -wordSize); __ cmpptr(rax_argslot, rbx_destslot); __ jccb(Assembler::aboveEqual, loop); } else { __ addptr(rax_argslot, swap_bytes); #ifdef ASSERT { // Verify that argslot < destslot, by at least swap_bytes. Label L_ok; __ cmpptr(rax_argslot, rbx_destslot); __ jccb(Assembler::belowEqual, L_ok); __ stop("source must be below destination (downward rotation)"); __ bind(L_ok); } #endif // work argslot up to destslot, copying contiguous data downwards // pseudo-code: // rax = src_addr + swap_bytes // rbx = dest_addr // while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++; Label loop; __ bind(loop); __ movptr(rdx_temp, Address(rax_argslot, 0)); __ movptr(Address(rax_argslot, -swap_bytes), rdx_temp); __ addptr(rax_argslot, wordSize); __ cmpptr(rax_argslot, rbx_destslot); __ jccb(Assembler::belowEqual, loop); } // pop the original first chunk into the destination slot, now free for (int i = 0; i < swap_bytes; i += wordSize) { __ pop(rdx_temp); __ movptr(Address(rbx_destslot, i), rdx_temp); } } __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_dup_args: { // 'argslot' is the position of the first argument to duplicate __ movl(rax_argslot, rcx_amh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot)); // 'stack_move' is negative number of words to duplicate Register rdx_stack_move = rdx_temp; __ movl2ptr(rdx_stack_move, rcx_amh_conversion); __ sarptr(rdx_stack_move, CONV_STACK_MOVE_SHIFT); int argslot0_num = 0; Address argslot0 = __ argument_address(RegisterOrConstant(argslot0_num)); assert(argslot0.base() == rsp, ""); int pre_arg_size = argslot0.disp(); assert(pre_arg_size % wordSize == 0, ""); assert(pre_arg_size > 0, "must include PC"); // remember the old rsp+1 (argslot[0]) Register rbx_oldarg = rbx_temp; __ lea(rbx_oldarg, argslot0); // move rsp down to make room for dups __ lea(rsp, Address(rsp, rdx_stack_move, Address::times_ptr)); // compute the new rsp+1 (argslot[0]) Register rdx_newarg = rdx_temp; __ lea(rdx_newarg, argslot0); __ push(rdi); // need a temp // (preceding push must be done after arg addresses are taken!) // pull down the pre_arg_size data (PC) for (int i = -pre_arg_size; i < 0; i += wordSize) { __ movptr(rdi, Address(rbx_oldarg, i)); __ movptr(Address(rdx_newarg, i), rdi); } // copy from rax_argslot[0...] down to new_rsp[1...] // pseudo-code: // rbx = old_rsp+1 // rdx = new_rsp+1 // rax = argslot // while (rdx < rbx) *rdx++ = *rax++ Label loop; __ bind(loop); __ movptr(rdi, Address(rax_argslot, 0)); __ movptr(Address(rdx_newarg, 0), rdi); __ addptr(rax_argslot, wordSize); __ addptr(rdx_newarg, wordSize); __ cmpptr(rdx_newarg, rbx_oldarg); __ jccb(Assembler::less, loop); __ pop(rdi); // restore temp __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_drop_args: { // 'argslot' is the position of the first argument to nuke __ movl(rax_argslot, rcx_amh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot)); __ push(rdi); // need a temp // (must do previous push after argslot address is taken) // 'stack_move' is number of words to drop Register rdi_stack_move = rdi; __ movl2ptr(rdi_stack_move, rcx_amh_conversion); __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT); remove_arg_slots(_masm, rdi_stack_move, rax_argslot, rbx_temp, rdx_temp); __ pop(rdi); // restore temp __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); } break; case _adapter_collect_args: __ unimplemented(entry_name(ek)); // %%% FIXME: NYI break; case _adapter_spread_args: // handled completely by optimized cases __ stop("init_AdapterMethodHandle should not issue this"); break; case _adapter_opt_spread_0: case _adapter_opt_spread_1: case _adapter_opt_spread_more: { // spread an array out into a group of arguments int length_constant = get_ek_adapter_opt_spread_info(ek); // find the address of the array argument __ movl(rax_argslot, rcx_amh_vmargslot); __ lea(rax_argslot, __ argument_address(rax_argslot)); // grab some temps { __ push(rsi); __ push(rdi); } // (preceding pushes must be done after argslot address is taken!) #define UNPUSH_RSI_RDI \ { __ pop(rdi); __ pop(rsi); } // arx_argslot points both to the array and to the first output arg vmarg = Address(rax_argslot, 0); // Get the array value. Register rsi_array = rsi; Register rdx_array_klass = rdx_temp; BasicType elem_type = T_OBJECT; int length_offset = arrayOopDesc::length_offset_in_bytes(); int elem0_offset = arrayOopDesc::base_offset_in_bytes(elem_type); __ movptr(rsi_array, vmarg); Label skip_array_check; if (length_constant == 0) { __ testptr(rsi_array, rsi_array); __ jcc(Assembler::zero, skip_array_check); } __ null_check(rsi_array, oopDesc::klass_offset_in_bytes()); __ load_klass(rdx_array_klass, rsi_array); // Check the array type. Register rbx_klass = rbx_temp; __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object! __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes())); Label ok_array_klass, bad_array_klass, bad_array_length; __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi, ok_array_klass); // If we get here, the type check failed! __ jmp(bad_array_klass); __ bind(ok_array_klass); // Check length. if (length_constant >= 0) { __ cmpl(Address(rsi_array, length_offset), length_constant); } else { Register rbx_vminfo = rbx_temp; __ movl(rbx_vminfo, rcx_amh_conversion); assert(CONV_VMINFO_SHIFT == 0, "preshifted"); __ andl(rbx_vminfo, CONV_VMINFO_MASK); __ cmpl(rbx_vminfo, Address(rsi_array, length_offset)); } __ jcc(Assembler::notEqual, bad_array_length); Register rdx_argslot_limit = rdx_temp; // Array length checks out. Now insert any required stack slots. if (length_constant == -1) { // Form a pointer to the end of the affected region. __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize)); // 'stack_move' is negative number of words to insert Register rdi_stack_move = rdi; __ movl2ptr(rdi_stack_move, rcx_amh_conversion); __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT); Register rsi_temp = rsi_array; // spill this insert_arg_slots(_masm, rdi_stack_move, -1, rax_argslot, rbx_temp, rsi_temp); // reload the array (since rsi was killed) __ movptr(rsi_array, vmarg); } else if (length_constant > 1) { int arg_mask = 0; int new_slots = (length_constant - 1); for (int i = 0; i < new_slots; i++) { arg_mask <<= 1; arg_mask |= _INSERT_REF_MASK; } insert_arg_slots(_masm, new_slots * stack_move_unit(), arg_mask, rax_argslot, rbx_temp, rdx_temp); } else if (length_constant == 1) { // no stack resizing required } else if (length_constant == 0) { remove_arg_slots(_masm, -stack_move_unit(), rax_argslot, rbx_temp, rdx_temp); } // Copy from the array to the new slots. // Note: Stack change code preserves integrity of rax_argslot pointer. // So even after slot insertions, rax_argslot still points to first argument. if (length_constant == -1) { // [rax_argslot, rdx_argslot_limit) is the area we are inserting into. Register rsi_source = rsi_array; __ lea(rsi_source, Address(rsi_array, elem0_offset)); Label loop; __ bind(loop); __ movptr(rbx_temp, Address(rsi_source, 0)); __ movptr(Address(rax_argslot, 0), rbx_temp); __ addptr(rsi_source, type2aelembytes(elem_type)); __ addptr(rax_argslot, Interpreter::stackElementSize); __ cmpptr(rax_argslot, rdx_argslot_limit); __ jccb(Assembler::less, loop); } else if (length_constant == 0) { __ bind(skip_array_check); // nothing to copy } else { int elem_offset = elem0_offset; int slot_offset = 0; for (int index = 0; index < length_constant; index++) { __ movptr(rbx_temp, Address(rsi_array, elem_offset)); __ movptr(Address(rax_argslot, slot_offset), rbx_temp); elem_offset += type2aelembytes(elem_type); slot_offset += Interpreter::stackElementSize; } } // Arguments are spread. Move to next method handle. UNPUSH_RSI_RDI; __ load_heap_oop(rcx_recv, rcx_mh_vmtarget); __ jump_to_method_handle_entry(rcx_recv, rdx_temp); __ bind(bad_array_klass); UNPUSH_RSI_RDI; assert(!vmarg.uses(rarg2_required), "must be different registers"); __ movptr(rarg2_required, Address(rdx_array_klass, java_mirror_offset)); // required type __ movptr(rarg1_actual, vmarg); // bad array __ movl( rarg0_code, (int) Bytecodes::_aaload); // who is complaining? __ jump(ExternalAddress(from_interpreted_entry(_raise_exception))); __ bind(bad_array_length); UNPUSH_RSI_RDI; assert(!vmarg.uses(rarg2_required), "must be different registers"); __ mov (rarg2_required, rcx_recv); // AMH requiring a certain length __ movptr(rarg1_actual, vmarg); // bad array __ movl( rarg0_code, (int) Bytecodes::_arraylength); // who is complaining? __ jump(ExternalAddress(from_interpreted_entry(_raise_exception))); #undef UNPUSH_RSI_RDI } break; case _adapter_flyby: case _adapter_ricochet: __ unimplemented(entry_name(ek)); // %%% FIXME: NYI break; default: ShouldNotReachHere(); } __ hlt(); address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry); __ unimplemented(entry_name(ek)); // %%% FIXME: NYI init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie)); }
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; Label L_2TAG_PACKET_10_0_2, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table_log; bind(start); subl(rsp, 104); movl(Address(rsp, 40), tmp); lea(tmp, ExternalAddress(static_const_table)); xorpd(xmm2, xmm2); movl(eax, 16368); pinsrw(xmm2, eax, 3); xorpd(xmm3, xmm3); movl(edx, 30704); pinsrw(xmm3, edx, 3); movsd(xmm0, Address(rsp, 112)); movapd(xmm1, xmm0); movl(ecx, 32768); movdl(xmm4, ecx); movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL pextrw(eax, xmm0, 3); por(xmm0, xmm2); psllq(xmm0, 5); movl(ecx, 16352); psrlq(xmm0, 34); rcpss(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); subl(eax, 16); cmpl(eax, 32736); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); bind(L_2TAG_PACKET_1_0_2); paddd(xmm0, xmm4); por(xmm1, xmm3); movdl(edx, xmm0); psllq(xmm0, 29); pand(xmm5, xmm1); pand(xmm0, xmm6); subsd(xmm1, xmm5); mulpd(xmm5, xmm0); andl(eax, 32752); subl(eax, ecx); cvtsi2sdl(xmm7, eax); mulsd(xmm1, xmm0); movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL subsd(xmm5, xmm2); andl(edx, 16711680); shrl(edx, 12); movdqu(xmm0, Address(tmp, edx)); movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL addsd(xmm1, xmm5); movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL mulsd(xmm6, xmm7); pshufd(xmm5, xmm1, 68); mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL mulsd(xmm3, xmm1); addsd(xmm0, xmm6); mulpd(xmm4, xmm5); mulpd(xmm5, xmm5); pshufd(xmm6, xmm0, 228); addsd(xmm0, xmm1); addpd(xmm4, xmm2); mulpd(xmm3, xmm5); subsd(xmm6, xmm0); mulsd(xmm4, xmm1); pshufd(xmm2, xmm0, 238); addsd(xmm1, xmm6); mulsd(xmm5, xmm5); addsd(xmm7, xmm2); addpd(xmm4, xmm3); addsd(xmm1, xmm7); mulpd(xmm4, xmm5); addsd(xmm1, xmm4); pshufd(xmm5, xmm4, 238); addsd(xmm1, xmm5); addsd(xmm0, xmm1); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movsd(xmm0, Address(rsp, 112)); movdqu(xmm1, xmm0); addl(eax, 16); cmpl(eax, 32768); jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); cmpl(eax, 16); jcc(Assembler::below, L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_5_0_2); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_6_0_2); jcc(Assembler::above, L_2TAG_PACKET_5_0_2); cmpl(edx, 0); jcc(Assembler::above, L_2TAG_PACKET_5_0_2); jmp(L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_3_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); addl(ecx, ecx); cmpl(ecx, -2097152); jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); bind(L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 32752); pinsrw(xmm1, eax, 3); movl(edx, 3); mulsd(xmm0, xmm1); bind(L_2TAG_PACKET_9_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 112)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_10_0_2); bind(L_2TAG_PACKET_8_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 49136); pinsrw(xmm0, eax, 3); divsd(xmm0, xmm1); movl(edx, 2); jmp(L_2TAG_PACKET_9_0_2); bind(L_2TAG_PACKET_4_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); xorpd(xmm1, xmm1); movl(eax, 18416); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movapd(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); psllq(xmm0, 5); movl(ecx, 18416); psrlq(xmm0, 34); rcpss(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 24), xmm0); fld_d(Address(rsp, 24)); bind(L_2TAG_PACKET_10_0_2); movl(tmp, Address(rsp, 40)); }
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { const char *name = NULL; switch (type) { case T_FLOAT: name = "jni_fast_GetFloatField"; break; case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); CodeBuffer cbuf(blob); MacroAssembler* masm = new MacroAssembler(&cbuf); address fast_entry = __ pc(); Label slow; ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcounter, counter); __ mov (robj, c_rarg1); __ testb (rcounter, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ xorptr(robj, rcounter); __ xorptr(robj, rcounter); // obj, since // robj ^ rcounter ^ rcounter == robj // robj is data dependent on rcounter. } __ clear_jweak_tag(robj); __ movptr(robj, Address(robj, 0)); // *obj __ mov (roffset, c_rarg2); __ shrptr(roffset, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { case T_FLOAT: __ movflt (xmm0, Address(robj, roffset, Address::times_1)); break; case T_DOUBLE: __ movdbl (xmm0, Address(robj, roffset, Address::times_1)); break; default: ShouldNotReachHere(); } if (os::is_MP()) { __ lea(rcounter_addr, counter); __ movdq (rax, xmm0); // counter address is data dependent on xmm0. __ xorptr(rcounter_addr, rax); __ xorptr(rcounter_addr, rax); __ cmpl (rcounter, Address(rcounter_addr, 0)); } else { __ cmp32 (rcounter, counter); } __ jcc (Assembler::notEqual, slow); __ ret (0); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr = NULL; switch (type) { case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); return fast_entry; }
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { const int aarch64_code_length = VtableStub::pd_code_size_limit(true); VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), aarch64_code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ lea(r19, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r19)); } #endif // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); // get receiver klass address npe_addr = __ pc(); __ load_klass(r19, j_rarg0); #ifndef PRODUCT if (DebugVtables) { Label L; // check offset vs vtable length __ ldrw(rscratch1, Address(r19, Klass::vtable_length_offset())); __ cmpw(rscratch1, vtable_index * vtableEntry::size()); __ br(Assembler::GT, L); __ enter(); __ mov(r2, vtable_index); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); __ leave(); __ bind(L); } #endif // PRODUCT __ lookup_virtual_method(r19, vtable_index, rmethod); if (DebugVtables) { Label L; __ cbz(rmethod, L); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ cbnz(rscratch1, L); __ stop("Vtable entry is NULL"); __ bind(L); } // r0: receiver klass // rmethod: Method* // r2: receiver address ame_addr = __ pc(); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ br(rscratch1); __ flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", vtable_index, p2i(s->entry_point()), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); return s; }
VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get away with. If you // add code here, bump the code stub size returned by pd_code_size_limit! const int i486_code_length = VtableStub::pd_code_size_limit(false); VtableStub* s = new(i486_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } ResourceMark rm; CodeBuffer cb(s->entry_point(), i486_code_length); MacroAssembler* masm = new MacroAssembler(&cb); // Entry arguments: // rax,: Interface // rcx: Receiver #ifndef PRODUCT if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } #endif /* PRODUCT */ // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes())); // Most registers are in use; we'll use rax, rbx, rsi, rdi // (If we need to make rsi, rdi callee-save, do a push/pop here.) const Register method = rbx; Label throw_icce; // Get Method* and entrypoint for compiler __ lookup_interface_method(// inputs: rec. class, interface, itable index rsi, rax, itable_index, // outputs: method, scan temp. reg method, rdi, throw_icce); // method (rbx): Method* // rcx: receiver #ifdef ASSERT if (DebugVtables) { Label L1; __ cmpptr(method, (int32_t)NULL_WORD); __ jcc(Assembler::equal, L1); __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); __ jcc(Assembler::notZero, L1); __ stop("Method* is null"); __ bind(L1); } #endif // ASSERT address ame_addr = __ pc(); __ jmp(Address(method, Method::from_compiled_offset())); __ bind(throw_icce); __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); masm->flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", itable_index, s->entry_point(), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); // shut the door on sizing bugs int slop = 3; // 32-bit offset is this much larger than an 8-bit one assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; }
void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list, void** vtable, char** md_top, char* md_end, char** mc_top, char* mc_end) { intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); *(intptr_t *)(*md_top) = vtable_bytes; *md_top += sizeof(intptr_t); void** dummy_vtable = (void**)*md_top; *vtable = dummy_vtable; *md_top += vtable_bytes; // Get ready to generate dummy methods. CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); MacroAssembler* masm = new MacroAssembler(&cb); Label common_code; for (int i = 0; i < vtbl_list_size; ++i) { for (int j = 0; j < num_virtuals; ++j) { dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); // Load eax with a value indicating vtable/offset pair. // -- bits[ 7..0] (8 bits) which virtual method in table? // -- bits[12..8] (5 bits) which virtual method table? // -- must fit in 13-bit instruction immediate field. __ movl(rax, (i << 8) + j); __ jmp(common_code); } } __ bind(common_code); // Expecting to be called with "thiscall" convections -- the arguments // are on the stack and the "this" pointer is in c_rarg0. In addition, rax // was set (above) to the offset of the method in the table. __ push(c_rarg1); // save & free register __ push(c_rarg0); // save "this" __ mov(c_rarg0, rax); __ shrptr(c_rarg0, 8); // isolate vtable identifier. __ shlptr(c_rarg0, LogBytesPerWord); __ lea(c_rarg1, ExternalAddress((address)vtbl_list)); // ptr to correct vtable list. __ addptr(c_rarg1, c_rarg0); // ptr to list entry. __ movptr(c_rarg1, Address(c_rarg1, 0)); // get correct vtable address. __ pop(c_rarg0); // restore "this" __ movptr(Address(c_rarg0, 0), c_rarg1); // update vtable pointer. __ andptr(rax, 0x00ff); // isolate vtable method index __ shlptr(rax, LogBytesPerWord); __ addptr(rax, c_rarg1); // address of real method pointer. __ pop(c_rarg1); // restore register. __ movptr(rax, Address(rax, 0)); // get real method pointer. __ jmp(rax); // jump to the real method. __ flush(); *mc_top = (char*)__ pc(); }
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp1, tmp2, eax, ecx, edx); jmp(start); address L_tbl = (address)_L_tbl; address log2 = (address)_log2; address coeff = (address)_coeff; bind(start); subq(rsp, 24); movsd(Address(rsp, 0), xmm0); mov64(rax, 0x3ff0000000000000); movdq(xmm2, rax); mov64(rdx, 0x77f0000000000000); movdq(xmm3, rdx); movl(ecx, 32768); movdl(xmm4, rcx); mov64(tmp1, 0xffffe00000000000); movdq(xmm5, tmp1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); movl(ecx, 16352); psrlq(xmm0, 27); lea(tmp2, ExternalAddress(L_tbl)); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); subl(eax, 16); cmpl(eax, 32736); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); bind(L_2TAG_PACKET_1_0_2); paddd(xmm0, xmm4); por(xmm1, xmm3); movdl(edx, xmm0); psllq(xmm0, 29); pand(xmm5, xmm1); pand(xmm0, xmm6); subsd(xmm1, xmm5); mulpd(xmm5, xmm0); andl(eax, 32752); subl(eax, ecx); cvtsi2sdl(xmm7, eax); mulsd(xmm1, xmm0); movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL subsd(xmm5, xmm2); andl(edx, 16711680); shrl(edx, 12); movdqu(xmm0, Address(tmp2, edx)); movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL addsd(xmm1, xmm5); movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL mulsd(xmm6, xmm7); movddup(xmm5, xmm1); mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL mulsd(xmm3, xmm1); addsd(xmm0, xmm6); mulpd(xmm4, xmm5); mulpd(xmm5, xmm5); movddup(xmm6, xmm0); addsd(xmm0, xmm1); addpd(xmm4, xmm2); mulpd(xmm3, xmm5); subsd(xmm6, xmm0); mulsd(xmm4, xmm1); pshufd(xmm2, xmm0, 238); addsd(xmm1, xmm6); mulsd(xmm5, xmm5); addsd(xmm7, xmm2); addpd(xmm4, xmm3); addsd(xmm1, xmm7); mulpd(xmm4, xmm5); addsd(xmm1, xmm4); pshufd(xmm5, xmm4, 238); addsd(xmm1, xmm5); addsd(xmm0, xmm1); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movq(xmm0, Address(rsp, 0)); movq(xmm1, Address(rsp, 0)); addl(eax, 16); cmpl(eax, 32768); jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); cmpl(eax, 16); jcc(Assembler::below, L_2TAG_PACKET_3_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); cmpl(edx, 0); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_3_0_2); xorpd(xmm1, xmm1); addsd(xmm1, xmm0); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); movl(eax, 18416); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); psrlq(xmm0, 27); movl(ecx, 18416); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); addl(ecx, ecx); cmpl(ecx, -2097152); jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_6_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 32752); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movl(Address(rsp, 16), 3); jmp(L_2TAG_PACKET_8_0_2); bind(L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 49136); pinsrw(xmm0, eax, 3); divsd(xmm0, xmm1); movl(Address(rsp, 16), 2); bind(L_2TAG_PACKET_8_0_2); movq(Address(rsp, 8), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 8)); bind(B1_5); addq(rsp, 24); }
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table; bind(start); subl(rsp, 120); movl(Address(rsp, 64), tmp); lea(tmp, ExternalAddress(static_const_table)); movdqu(xmm0, Address(rsp, 128)); unpcklpd(xmm0, xmm0); movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_1_0_2); fnstcw(Address(rsp, 24)); movzwl(edx, Address(rsp, 24)); orl(edx, 768); movw(Address(rsp, 28), edx); fldcw(Address(rsp, 28)); movl(edx, eax); sarl(eax, 1); subl(edx, eax); movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL pandn(xmm6, xmm2); addl(eax, 1023); movdl(xmm3, eax); psllq(xmm3, 52); por(xmm6, xmm3); addl(edx, 1023); movdl(xmm4, edx); psllq(xmm4, 52); movsd(Address(rsp, 8), xmm0); fld_d(Address(rsp, 8)); movsd(Address(rsp, 16), xmm6); fld_d(Address(rsp, 16)); fmula(1); faddp(1); movsd(Address(rsp, 8), xmm4); fld_d(Address(rsp, 8)); fmulp(1); fstp_d(Address(rsp, 8)); movsd(xmm0,Address(rsp, 8)); fldcw(Address(rsp, 24)); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_2_0_2); cmpl(ecx, INT_MIN); jcc(Assembler::less, L_2TAG_PACKET_3_0_2); cmpl(ecx, -1064950997); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); movl(edx, Address(rsp, 128)); cmpl(edx ,-17155601); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jmp(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_3_0_2); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_4_0_2); movl(edx, 15); bind(L_2TAG_PACKET_5_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 128)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_7_0_2); cmpl(eax, 2146435072); jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, INT_MIN); jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1216)); mulsd(xmm0, xmm0); movl(edx, 15); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_8_0_2); movl(edx, Address(rsp, 128)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_10_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 48), xmm0); fld_d(Address(rsp, 48)); bind(L_2TAG_PACKET_6_0_2); movl(tmp, Address(rsp, 64)); }
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address cv = (address)_cv; address Shifter = (address)_shifter; address mmask = (address)_mmask; address bias = (address)_bias; address Tbl_addr = (address)_Tbl_addr; address ALLONES = (address)_ALLONES; address ebias = (address)_ebias; address XMAX = (address)_XMAX; address XMIN = (address)_XMIN; address INF = (address)_INF; address ZERO = (address)_ZERO; address ONE_val = (address)_ONE_val; bind(start); subq(rsp, 24); movsd(Address(rsp, 8), xmm0); unpcklpd(xmm0, xmm0); movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); lea(tmp, ExternalAddress(Tbl_addr)); movdqu(xmm2, Address(ecx,tmp)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp (B1_5); bind(L_2TAG_PACKET_1_0_2); xorpd(xmm3, xmm3); movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL movl(edx, -1022); subl(edx, eax); movdl(xmm5, edx); psllq(xmm4, xmm5); movl(ecx, eax); sarl(eax, 1); pinsrw(xmm3, eax, 3); movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL psllq(xmm3, 4); psubd(xmm2, xmm3); mulsd(xmm0, xmm2); cmpl(edx, 52); jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); pand(xmm4, xmm2); paddd(xmm3, xmm6); subsd(xmm2, xmm4); addsd(xmm0, xmm2); cmpl(ecx, 1023); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); pextrw(ecx, xmm0, 3); andl(ecx, 32768); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); movapd(xmm6, xmm0); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); mulsd(xmm6, xmm3); mulsd(xmm4, xmm3); movdqu(xmm0, xmm6); pxor(xmm6, xmm4); psrad(xmm6, 31); pshufd(xmm6, xmm6, 85); psllq(xmm0, 1); psrlq(xmm0, 1); pxor(xmm0, xmm6); psrlq(xmm6, 63); paddq(xmm0, xmm6); paddq(xmm0, xmm4); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); jmp(B1_5); bind(L_2TAG_PACKET_3_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); jmp(B1_5); bind(L_2TAG_PACKET_2_0_2); paddd(xmm3, xmm6); addpd(xmm0, xmm2); mulsd(xmm0, xmm3); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_8_0_2); cmpl(eax, 2146435072); jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); movl(eax, Address(rsp,12)); cmpl(eax, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); bind(L_2TAG_PACKET_7_0_2); movl(Address(rsp,0), 14); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL mulsd(xmm0, xmm0); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_9_0_2); movl(edx, Address(rsp,8)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_11_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movl(eax, Address(rsp,12)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL jmp(B1_5); bind(L_2TAG_PACKET_12_0_2); movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL jmp(B1_5); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(rsp, 8)); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 12)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); movsd(Address(rsp, 8), xmm0); addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL jmp(B1_5); bind(L_2TAG_PACKET_6_0_2); movq(Address(rsp, 16), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 16)); bind(B1_5); addq(rsp, 24); }
VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get // away with. If you add code here, bump the code stub size // returned by pd_code_size_limit! const int amd64_code_length = VtableStub::pd_code_size_limit(false); VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), amd64_code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } #endif // Entry arguments: // rax: Interface // j_rarg0: Receiver // Free registers (non-args) are rax (interface), rbx // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); // Most registers are in use; we'll use rax, rbx, r10, r11 // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them) __ load_klass(r10, j_rarg0); // If we take a trap while this arg is on the stack we will not // be able to walk the stack properly. This is not an issue except // when there are mistakes in this assembly code that could generate // a spurious fault. Ask me how I know... const Register method = rbx; Label throw_icce; // Get methodOop and entrypoint for compiler __ lookup_interface_method(// inputs: rec. class, interface, itable index r10, rax, itable_index, // outputs: method, scan temp. reg method, r11, throw_icce); // method (rbx): methodOop // j_rarg0: receiver #ifdef ASSERT if (DebugVtables) { Label L2; __ cmpptr(method, (int32_t)NULL_WORD); __ jcc(Assembler::equal, L2); __ cmpptr(Address(method, methodOopDesc::from_compiled_offset()), (int32_t)NULL_WORD); __ jcc(Assembler::notZero, L2); __ stop("compiler entrypoint is null"); __ bind(L2); } #endif // ASSERT // rbx: methodOop // j_rarg0: receiver address ame_addr = __ pc(); __ jmp(Address(method, methodOopDesc::from_compiled_offset())); __ bind(throw_icce); __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", itable_index, s->entry_point(), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); // shut the door on sizing bugs int slop = 3; // 32-bit offset is this much larger than an 8-bit one assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; }
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { const char *name; switch (type) { case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; case T_BYTE: name = "jni_fast_GetByteField"; break; case T_CHAR: name = "jni_fast_GetCharField"; break; case T_SHORT: name = "jni_fast_GetShortField"; break; case T_INT: name = "jni_fast_GetIntField"; break; case T_LONG: name = "jni_fast_GetLongField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow; ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcounter, counter); __ mov (robj, c_rarg1); __ testb (rcounter, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ xorptr(robj, rcounter); __ xorptr(robj, rcounter); // obj, since // robj ^ rcounter ^ rcounter == robj // robj is data dependent on rcounter. } __ movptr(robj, Address(robj, 0)); // *obj __ mov (roffset, c_rarg2); __ shrptr(roffset, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { case T_BOOLEAN: __ movzbl (rax, Address(robj, roffset, Address::times_1)); break; case T_BYTE: __ movsbl (rax, Address(robj, roffset, Address::times_1)); break; case T_CHAR: __ movzwl (rax, Address(robj, roffset, Address::times_1)); break; case T_SHORT: __ movswl (rax, Address(robj, roffset, Address::times_1)); break; case T_INT: __ movl (rax, Address(robj, roffset, Address::times_1)); break; case T_LONG: __ movq (rax, Address(robj, roffset, Address::times_1)); break; default: ShouldNotReachHere(); } if (os::is_MP()) { __ lea(rcounter_addr, counter); // ca is data dependent on rax. __ xorptr(rcounter_addr, rax); __ xorptr(rcounter_addr, rax); __ cmpl (rcounter, Address(rcounter_addr, 0)); } else { __ cmp32 (rcounter, counter); } __ jcc (Assembler::notEqual, slow); __ ret (0); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr; switch (type) { case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; case T_INT: slow_case_addr = jni_GetIntField_addr(); break; case T_LONG: slow_case_addr = jni_GetLongField_addr(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); return fast_entry; }
// These stubs are used by the compiler only. // Argument registers, which must be preserved: // rcx - receiver (always first argument) // rdx - second argument (if any) // Other registers that might be usable: // rax - inline cache register (is interface for itable stub) // rbx - method (used when calling out to interpreter) // Available now, but may become callee-save at some point: // rsi, rdi // Note that rax and rdx are also used for return values. // VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { const int i486_code_length = VtableStub::pd_code_size_limit(true); VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } ResourceMark rm; CodeBuffer cb(s->entry_point(), i486_code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } #endif /* PRODUCT */ // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); // get receiver klass address npe_addr = __ pc(); __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); #ifndef PRODUCT if (DebugVtables) { Label L; // check offset vs vtable length __ cmpl(Address(rax, InstanceKlass::vtable_length_offset()*wordSize), vtable_index*vtableEntry::size()); __ jcc(Assembler::greater, L); __ movl(rbx, vtable_index); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx); __ bind(L); } #endif // PRODUCT const Register method = rbx; // load Method* and target address __ lookup_virtual_method(rax, vtable_index, method); if (DebugVtables) { Label L; __ cmpptr(method, (int32_t)NULL_WORD); __ jcc(Assembler::equal, L); __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); __ jcc(Assembler::notZero, L); __ stop("Vtable entry is NULL"); __ bind(L); } // rax,: receiver klass // method (rbx): Method* // rcx: receiver address ame_addr = __ pc(); __ jmp( Address(method, Method::from_compiled_offset())); masm->flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", vtable_index, s->entry_point(), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); // shut the door on sizing bugs int slop = 3; // 32-bit offset is this much larger than an 8-bit one assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; }
VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get // away with. If you add code here, bump the code stub size // returned by pd_code_size_limit! const int code_length = VtableStub::pd_code_size_limit(false); VtableStub* s = new(code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r10)); } #endif // Entry arguments: // rscratch2: Interface // j_rarg0: Receiver // Free registers (non-args) are r0 (interface), rmethod // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); // Most registers are in use; we'll use r0, rmethod, r10, r11 __ load_klass(r10, j_rarg0); Label throw_icce; // Get Method* and entrypoint for compiler __ lookup_interface_method(// inputs: rec. class, interface, itable index r10, rscratch2, itable_index, // outputs: method, scan temp. reg rmethod, r11, throw_icce); // method (rmethod): Method* // j_rarg0: receiver #ifdef ASSERT if (DebugVtables) { Label L2; __ cbz(rmethod, L2); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ cbnz(rscratch1, L2); __ stop("compiler entrypoint is null"); __ bind(L2); } #endif // ASSERT // rmethod: Method* // j_rarg0: receiver address ame_addr = __ pc(); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ br(rscratch1); __ bind(throw_icce); __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", itable_index, p2i(s->entry_point()), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); return s; }