Exemple #1
0
void cmp_tests() {
	int a = 1, b = 2;
	
	/* greater tests */
	gtl((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 0);
	gtl((char *)&b, (char *)&a); assert(*(int32_t *)ecx == 1);
	gel((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 0);
	gel((char *)&a, (char *)&a); assert(*(int32_t *)ecx == 1);
	gel((char *)&b, (char *)&a); assert(*(int32_t *)ecx == 1);

	/* less tests */
	ltl((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 1);
	ltl((char *)&b, (char *)&a); assert(*(int32_t *)ecx == 0);
	lel((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 1);
	lel((char *)&a, (char *)&a); assert(*(int32_t *)ecx == 1);
	lel((char *)&b, (char *)&a); assert(*(int32_t *)ecx == 0);

	/* (not) equal tests */
	eql((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 0);
	eql((char *)&b, (char *)&b); assert(*(int32_t *)ecx == 1);
	nel((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 1);
	nel((char *)&a, (char *)&a); assert(*(int32_t *)ecx == 0);

	/* and/or tests */
	a = 0, b = 1;
	andl((char *)&a, (char *)&a); assert(*(int32_t *)ecx == 0);
	andl((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 0);
	andl((char *)&b, (char *)&b); assert(*(int32_t *)ecx == 1);
	orl((char *)&a, (char *)&a); assert(*(int32_t *)ecx == 0);
	orl((char *)&a, (char *)&b); assert(*(int32_t *)ecx == 1);
	orl((char *)&b, (char *)&b); assert(*(int32_t *)ecx == 1);
}
Exemple #2
0
static void set_channel_end(void)
{
	/* Manipulate CCA threshold to resume transmission */
	set(AR9170_PHY_REG_CCA_THRESHOLD, 0x0);
	/* Disable Virtual CCA */
	andl(AR9170_MAC_REG_QOS_PRIORITY_VIRTUAL_CCA,
	     ~AR9170_MAC_VIRTUAL_CCA_ALL);

	fw.phy.state = CARL9170_PHY_ON;
}
Exemple #3
0
void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
  Label L;
  BLOCK_COMMENT("verify_ref_kind {");
  __ movl(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
  __ shrl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
  __ andl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
  __ cmpl(temp, ref_kind);
  __ jcc(Assembler::equal, L);
  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
    if (ref_kind == JVM_REF_invokeVirtual ||
        ref_kind == JVM_REF_invokeSpecial)
      // could do this for all ref_kinds, but would explode assembly code size
      trace_method_handle(_masm, buf);
    __ STOP(buf);
  }
  BLOCK_COMMENT("} verify_ref_kind");
  __ bind(L);
}
void InterpreterStubs::generate_interpreter_call_vm_redo() {
#if ENABLE_JAVA_DEBUGGER
  Label check_breakpoint, no_breakpoint;
#endif
  comment_section("Interpreter call VM - and repeat current bytecode upon termination");
  entry("interpreter_call_vm_redo");

  comment("Save bytecode pointer");
  movl(Address(ebp, Constant(JavaFrame::bcp_store_offset())), esi);

  comment("Call the shared call vm and disregard any return value");
  call_shared_call_vm(T_VOID);
 
  comment("Restore bytecode pointer");
  movl(esi, Address(ebp, Constant(JavaFrame::bcp_store_offset())));

  comment("Restore locals pointer");
  movl(edi, Address(ebp, Constant(JavaFrame::locals_pointer_offset())));

#if ENABLE_JAVA_DEBUGGER
  comment("Check to see if we are connected to a debugger");
  cmpb(Address(Constant("_debugger_active")), Constant(0));
  jcc(not_zero, Constant(check_breakpoint));
  bind(no_breakpoint);
  comment("Not debugging, so just dispatch");
  dispatch_next(0);
  bind(check_breakpoint);
  comment("We are debugging, so let's see if we just replaced a breakpoint opcode");
  cmpb(Address(esi), Constant(Bytecodes::_breakpoint));
  jcc(not_zero, Constant(no_breakpoint));
  comment("There is a breakpoint in the code, so that means that eax has the correct opcode");
  comment("So just jmp directly without using esi");
  andl(eax, Constant(0xFF));
  movl(ebx, eax);
  jmp(Address(no_reg, ebx, times_4, Constant("interpreter_dispatch_table")));
#else
  dispatch_next(0);
#endif
  entry_end(); // interpreter_call_vm_redo
}
Exemple #5
0
void InterpreterMacroAssembler::narrow(Register result) {

  // Get method->_constMethod->_result_type
  movptr(rcx, Address(rbp, frame::interpreter_frame_method_offset * wordSize));
  movptr(rcx, Address(rcx, Method::const_offset()));
  load_unsigned_byte(rcx, Address(rcx, ConstMethod::result_type_offset()));

  Label done, notBool, notByte, notChar;

  // common case first
  cmpl(rcx, T_INT);
  jcc(Assembler::equal, done);

  // mask integer result to narrower return type.
  cmpl(rcx, T_BOOLEAN);
  jcc(Assembler::notEqual, notBool);
  andl(result, 0x1);
  jmp(done);

  bind(notBool);
  cmpl(rcx, T_BYTE);
  jcc(Assembler::notEqual, notByte);
  LP64_ONLY(movsbl(result, result);)
Exemple #6
0
void __section(boot) __noreturn __visible start(void)
{
    clock_set(AHB_40MHZ_OSC, true);

    /* watchdog magic pattern check */
    if ((get(AR9170_PWR_REG_WATCH_DOG_MAGIC) & 0xffff0000) == 0x12340000) {
        /* watch dog warm start */
        incl(AR9170_PWR_REG_WATCH_DOG_MAGIC);
        usb_trigger_out();
    } else if ((get(AR9170_PWR_REG_WATCH_DOG_MAGIC) & 0xffff0000) == 0x98760000) {
        /* suspend/resume */
    }

    /* write the magic pattern for watch dog */
    andl(AR9170_PWR_REG_WATCH_DOG_MAGIC, 0xFFFF);
    orl(AR9170_PWR_REG_WATCH_DOG_MAGIC, 0x12340000);

    init();

#ifdef CONFIG_CARL9170FW_DEBUG

    BUG("TEST BUG");
    BUG_ON(0x2b || !0x2b);
    INFO("INFO MESSAGE");

    /* a set of unique characters to detect transfer data corruptions */
    DBG("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
        " ~`!1@2#3$4%%5^6&7*8(9)0_-+={[}]|\\:;\"'<,>.?/");
#endif /* CONFIG_CARL9170FW_DEBUG */

    /*
     * Tell the host, that the firmware has booted and is
     * now ready to process requests.
     */
    send_cmd_to_host(0, CARL9170_RSP_BOOT, 0x00, NULL);
    main_loop();
}
  address generate_verify_oop() {
    StubCodeMark mark(this, "StubRoutines", "verify_oop");
    address start = __ pc();
    
    // Incoming arguments on stack after saving eax:
    //
    // [tos    ]: saved edx
    // [tos + 1]: saved EFLAGS
    // [tos + 2]: return address
    // [tos + 3]: char* error message
    // [tos + 4]: oop   object to verify
    // [tos + 5]: saved eax - saved by caller and bashed
    
    Label exit, error;
    __ pushfd();
    __ incl(Address((int)StubRoutines::verify_oop_count_addr(), relocInfo::none));
    __ pushl(edx);                               // save edx
    // make sure object is 'reasonable'
    __ movl(eax, Address(esp, 4 * wordSize));    // get object
    __ testl(eax, eax);
    __ jcc(Assembler::zero, exit);               // if obj is NULL it is ok
    
    // Check if the oop is in the right area of memory
    const int oop_mask = Universe::verify_oop_mask();
    const int oop_bits = Universe::verify_oop_bits();
    __ movl(edx, eax);
    __ andl(edx, oop_mask);
    __ cmpl(edx, oop_bits);
    __ jcc(Assembler::notZero, error);

    // make sure klass is 'reasonable'
    __ movl(eax, Address(eax, oopDesc::klass_offset_in_bytes())); // get klass
    __ testl(eax, eax);
    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken

    // Check if the klass is in the right area of memory
    const int klass_mask = Universe::verify_klass_mask();
    const int klass_bits = Universe::verify_klass_bits();
    __ movl(edx, eax);
    __ andl(edx, klass_mask);
    __ cmpl(edx, klass_bits);
    __ jcc(Assembler::notZero, error);

    // make sure klass' klass is 'reasonable'
    __ movl(eax, Address(eax, oopDesc::klass_offset_in_bytes())); // get klass' klass
    __ testl(eax, eax);
    __ jcc(Assembler::zero, error);              // if klass' klass is NULL it is broken

    __ movl(edx, eax);
    __ andl(edx, klass_mask);
    __ cmpl(edx, klass_bits);
    __ jcc(Assembler::notZero, error);           // if klass not in right area
                                                 // of memory it is broken too.

    // return if everything seems ok
    __ bind(exit);
    __ movl(eax, Address(esp, 5 * wordSize));    // get saved eax back
    __ popl(edx);                                // restore edx
    __ popfd();                                  // restore EFLAGS
    __ ret(3 * wordSize);                        // pop arguments

    // handle errors
    __ bind(error);
    __ movl(eax, Address(esp, 5 * wordSize));    // get saved eax back
    __ popl(edx);                                // get saved edx back
    __ popfd();                                  // get saved EFLAGS off stack -- will be ignored
    __ pushad();                                 // push registers (eip = return address & msg are already pushed)
    __ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
    __ popad();
    __ ret(3 * wordSize);                        // pop arguments
    return start;
  }
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
  Label L_2TAG_PACKET_8_0_2;
  Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;

  assert_different_registers(tmp1, tmp2, eax, ecx, edx);
  jmp(start);
  address L_tbl = (address)_L_tbl;
  address log2 = (address)_log2;
  address coeff = (address)_coeff;

  bind(start);
  subq(rsp, 24);
  movsd(Address(rsp, 0), xmm0);
  mov64(rax, 0x3ff0000000000000);
  movdq(xmm2, rax);
  mov64(rdx, 0x77f0000000000000);
  movdq(xmm3, rdx);
  movl(ecx, 32768);
  movdl(xmm4, rcx);
  mov64(tmp1, 0xffffe00000000000);
  movdq(xmm5, tmp1);
  movdqu(xmm1, xmm0);
  pextrw(eax, xmm0, 3);
  por(xmm0, xmm2);
  movl(ecx, 16352);
  psrlq(xmm0, 27);
  lea(tmp2, ExternalAddress(L_tbl));
  psrld(xmm0, 2);
  rcpps(xmm0, xmm0);
  psllq(xmm1, 12);
  pshufd(xmm6, xmm5, 228);
  psrlq(xmm1, 12);
  subl(eax, 16);
  cmpl(eax, 32736);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);

  bind(L_2TAG_PACKET_1_0_2);
  paddd(xmm0, xmm4);
  por(xmm1, xmm3);
  movdl(edx, xmm0);
  psllq(xmm0, 29);
  pand(xmm5, xmm1);
  pand(xmm0, xmm6);
  subsd(xmm1, xmm5);
  mulpd(xmm5, xmm0);
  andl(eax, 32752);
  subl(eax, ecx);
  cvtsi2sdl(xmm7, eax);
  mulsd(xmm1, xmm0);
  movq(xmm6, ExternalAddress(log2));       // 0xfefa3800UL, 0x3fa62e42UL
  movdqu(xmm3, ExternalAddress(coeff));    // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
  subsd(xmm5, xmm2);
  andl(edx, 16711680);
  shrl(edx, 12);
  movdqu(xmm0, Address(tmp2, edx));
  movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
  addsd(xmm1, xmm5);
  movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
  mulsd(xmm6, xmm7);
  movddup(xmm5, xmm1);
  mulsd(xmm7, ExternalAddress(8 + log2));    // 0x93c76730UL, 0x3ceef357UL
  mulsd(xmm3, xmm1);
  addsd(xmm0, xmm6);
  mulpd(xmm4, xmm5);
  mulpd(xmm5, xmm5);
  movddup(xmm6, xmm0);
  addsd(xmm0, xmm1);
  addpd(xmm4, xmm2);
  mulpd(xmm3, xmm5);
  subsd(xmm6, xmm0);
  mulsd(xmm4, xmm1);
  pshufd(xmm2, xmm0, 238);
  addsd(xmm1, xmm6);
  mulsd(xmm5, xmm5);
  addsd(xmm7, xmm2);
  addpd(xmm4, xmm3);
  addsd(xmm1, xmm7);
  mulpd(xmm4, xmm5);
  addsd(xmm1, xmm4);
  pshufd(xmm5, xmm4, 238);
  addsd(xmm1, xmm5);
  addsd(xmm0, xmm1);
  jmp(B1_5);

  bind(L_2TAG_PACKET_0_0_2);
  movq(xmm0, Address(rsp, 0));
  movq(xmm1, Address(rsp, 0));
  addl(eax, 16);
  cmpl(eax, 32768);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
  cmpl(eax, 16);
  jcc(Assembler::below, L_2TAG_PACKET_3_0_2);

  bind(L_2TAG_PACKET_4_0_2);
  addsd(xmm0, xmm0);
  jmp(B1_5);

  bind(L_2TAG_PACKET_5_0_2);
  jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
  cmpl(edx, 0);
  jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_3_0_2);
  xorpd(xmm1, xmm1);
  addsd(xmm1, xmm0);
  movdl(edx, xmm1);
  psrlq(xmm1, 32);
  movdl(ecx, xmm1);
  orl(edx, ecx);
  cmpl(edx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
  xorpd(xmm1, xmm1);
  movl(eax, 18416);
  pinsrw(xmm1, eax, 3);
  mulsd(xmm0, xmm1);
  movdqu(xmm1, xmm0);
  pextrw(eax, xmm0, 3);
  por(xmm0, xmm2);
  psrlq(xmm0, 27);
  movl(ecx, 18416);
  psrld(xmm0, 2);
  rcpps(xmm0, xmm0);
  psllq(xmm1, 12);
  pshufd(xmm6, xmm5, 228);
  psrlq(xmm1, 12);
  jmp(L_2TAG_PACKET_1_0_2);

  bind(L_2TAG_PACKET_2_0_2);
  movdl(edx, xmm1);
  psrlq(xmm1, 32);
  movdl(ecx, xmm1);
  addl(ecx, ecx);
  cmpl(ecx, -2097152);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
  orl(edx, ecx);
  cmpl(edx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);

  bind(L_2TAG_PACKET_6_0_2);
  xorpd(xmm1, xmm1);
  xorpd(xmm0, xmm0);
  movl(eax, 32752);
  pinsrw(xmm1, eax, 3);
  mulsd(xmm0, xmm1);
  movl(Address(rsp, 16), 3);
  jmp(L_2TAG_PACKET_8_0_2);
  bind(L_2TAG_PACKET_7_0_2);
  xorpd(xmm1, xmm1);
  xorpd(xmm0, xmm0);
  movl(eax, 49136);
  pinsrw(xmm0, eax, 3);
  divsd(xmm0, xmm1);
  movl(Address(rsp, 16), 2);

  bind(L_2TAG_PACKET_8_0_2);
  movq(Address(rsp, 8), xmm0);

  bind(B1_3);
  movq(xmm0, Address(rsp, 8));

  bind(B1_5);
  addq(rsp, 24);
}
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
  Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;

  assert_different_registers(tmp, eax, ecx, edx);
  jmp(start);
  address static_const_table = (address)_static_const_table;

  bind(start);
  subl(rsp, 120);
  movl(Address(rsp, 64), tmp);
  lea(tmp, ExternalAddress(static_const_table));
  movdqu(xmm0, Address(rsp, 128));
  unpcklpd(xmm0, xmm0);
  movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
  movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
  movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
  pextrw(eax, xmm0, 3);
  andl(eax, 32767);
  movl(edx, 16527);
  subl(edx, eax);
  subl(eax, 15504);
  orl(edx, eax);
  cmpl(edx, INT_MIN);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
  mulpd(xmm1, xmm0);
  addpd(xmm1, xmm6);
  movapd(xmm7, xmm1);
  subpd(xmm1, xmm6);
  mulpd(xmm2, xmm1);
  movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
  mulpd(xmm3, xmm1);
  movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
  subpd(xmm0, xmm2);
  movdl(eax, xmm7);
  movl(ecx, eax);
  andl(ecx, 63);
  shll(ecx, 4);
  sarl(eax, 6);
  movl(edx, eax);
  movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  pand(xmm7, xmm6);
  movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  paddq(xmm7, xmm6);
  psllq(xmm7, 46);
  subpd(xmm0, xmm3);
  movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
  mulpd(xmm4, xmm0);
  movapd(xmm6, xmm0);
  movapd(xmm1, xmm0);
  mulpd(xmm6, xmm6);
  mulpd(xmm0, xmm6);
  addpd(xmm5, xmm4);
  mulsd(xmm0, xmm6);
  mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
  addsd(xmm1, xmm2);
  unpckhpd(xmm2, xmm2);
  mulpd(xmm0, xmm5);
  addsd(xmm1, xmm0);
  por(xmm2, xmm7);
  unpckhpd(xmm0, xmm0);
  addsd(xmm0, xmm1);
  addsd(xmm0, xmm6);
  addl(edx, 894);
  cmpl(edx, 1916);
  jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
  mulsd(xmm0, xmm2);
  addsd(xmm0, xmm2);
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_1_0_2);
  fnstcw(Address(rsp, 24));
  movzwl(edx, Address(rsp, 24));
  orl(edx, 768);
  movw(Address(rsp, 28), edx);
  fldcw(Address(rsp, 28));
  movl(edx, eax);
  sarl(eax, 1);
  subl(edx, eax);
  movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
  pandn(xmm6, xmm2);
  addl(eax, 1023);
  movdl(xmm3, eax);
  psllq(xmm3, 52);
  por(xmm6, xmm3);
  addl(edx, 1023);
  movdl(xmm4, edx);
  psllq(xmm4, 52);
  movsd(Address(rsp, 8), xmm0);
  fld_d(Address(rsp, 8));
  movsd(Address(rsp, 16), xmm6);
  fld_d(Address(rsp, 16));
  fmula(1);
  faddp(1);
  movsd(Address(rsp, 8), xmm4);
  fld_d(Address(rsp, 8));
  fmulp(1);
  fstp_d(Address(rsp, 8));
  movsd(xmm0,Address(rsp, 8));
  fldcw(Address(rsp, 24));
  pextrw(ecx, xmm0, 3);
  andl(ecx, 32752);
  cmpl(ecx, 32752);
  jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
  cmpl(ecx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
  jmp(L_2TAG_PACKET_2_0_2);
  cmpl(ecx, INT_MIN);
  jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
  cmpl(ecx, -1064950997);
  jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
  jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
  movl(edx, Address(rsp, 128));
  cmpl(edx ,-17155601);
  jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
  jmp(L_2TAG_PACKET_4_0_2);

  bind(L_2TAG_PACKET_3_0_2);
  movl(edx, 14);
  jmp(L_2TAG_PACKET_5_0_2);

  bind(L_2TAG_PACKET_4_0_2);
  movl(edx, 15);

  bind(L_2TAG_PACKET_5_0_2);
  movsd(Address(rsp, 0), xmm0);
  movsd(xmm0, Address(rsp, 128));
  fld_d(Address(rsp, 0));
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_7_0_2);
  cmpl(eax, 2146435072);
  jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
  movl(eax, Address(rsp, 132));
  cmpl(eax, INT_MIN);
  jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
  movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
  mulsd(xmm0, xmm0);
  movl(edx, 14);
  jmp(L_2TAG_PACKET_5_0_2);

  bind(L_2TAG_PACKET_9_0_2);
  movsd(xmm0, Address(tmp, 1216));
  mulsd(xmm0, xmm0);
  movl(edx, 15);
  jmp(L_2TAG_PACKET_5_0_2);

  bind(L_2TAG_PACKET_8_0_2);
  movl(edx, Address(rsp, 128));
  cmpl(eax, 2146435072);
  jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
  cmpl(edx, 0);
  jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
  movl(eax, Address(rsp, 132));
  cmpl(eax, 2146435072);
  jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
  movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_11_0_2);
  movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_10_0_2);
  movsd(xmm0, Address(rsp, 128));
  addsd(xmm0, xmm0);
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_0_0_2);
  movl(eax, Address(rsp, 132));
  andl(eax, 2147483647);
  cmpl(eax, 1083179008);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
  movsd(xmm0, Address(rsp, 128));
  addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_2_0_2);
  movsd(Address(rsp, 48), xmm0);
  fld_d(Address(rsp, 48));

  bind(L_2TAG_PACKET_6_0_2);
  movl(tmp, Address(rsp, 64));
}
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
  Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;

  assert_different_registers(tmp, eax, ecx, edx);
  jmp(start);
  address cv = (address)_cv;
  address Shifter = (address)_shifter;
  address mmask = (address)_mmask;
  address bias = (address)_bias;
  address Tbl_addr = (address)_Tbl_addr;
  address ALLONES = (address)_ALLONES;
  address ebias = (address)_ebias;
  address XMAX = (address)_XMAX;
  address XMIN = (address)_XMIN;
  address INF = (address)_INF;
  address ZERO = (address)_ZERO;
  address ONE_val = (address)_ONE_val;

  bind(start);
  subq(rsp, 24);
  movsd(Address(rsp, 8), xmm0);
  unpcklpd(xmm0, xmm0);
  movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
  movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  movdqu(xmm2, ExternalAddress(16+cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
  movdqu(xmm3, ExternalAddress(32+cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
  pextrw(eax, xmm0, 3);
  andl(eax, 32767);
  movl(edx, 16527);
  subl(edx, eax);
  subl(eax, 15504);
  orl(edx, eax);
  cmpl(edx, INT_MIN);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
  mulpd(xmm1, xmm0);
  addpd(xmm1, xmm6);
  movapd(xmm7, xmm1);
  subpd(xmm1, xmm6);
  mulpd(xmm2, xmm1);
  movdqu(xmm4, ExternalAddress(64+cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
  mulpd(xmm3, xmm1);
  movdqu(xmm5, ExternalAddress(80+cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
  subpd(xmm0, xmm2);
  movdl(eax, xmm7);
  movl(ecx, eax);
  andl(ecx, 63);
  shll(ecx, 4);
  sarl(eax, 6);
  movl(edx, eax);
  movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  pand(xmm7, xmm6);
  movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  paddq(xmm7, xmm6);
  psllq(xmm7, 46);
  subpd(xmm0, xmm3);
  lea(tmp, ExternalAddress(Tbl_addr));
  movdqu(xmm2, Address(ecx,tmp));
  mulpd(xmm4, xmm0);
  movapd(xmm6, xmm0);
  movapd(xmm1, xmm0);
  mulpd(xmm6, xmm6);
  mulpd(xmm0, xmm6);
  addpd(xmm5, xmm4);
  mulsd(xmm0, xmm6);
  mulpd(xmm6, ExternalAddress(48+cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
  addsd(xmm1, xmm2);
  unpckhpd(xmm2, xmm2);
  mulpd(xmm0, xmm5);
  addsd(xmm1, xmm0);
  por(xmm2, xmm7);
  unpckhpd(xmm0, xmm0);
  addsd(xmm0, xmm1);
  addsd(xmm0, xmm6);
  addl(edx, 894);
  cmpl(edx, 1916);
  jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
  mulsd(xmm0, xmm2);
  addsd(xmm0, xmm2);
  jmp (B1_5);

  bind(L_2TAG_PACKET_1_0_2);
  xorpd(xmm3, xmm3);
  movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
  movl(edx, -1022);
  subl(edx, eax);
  movdl(xmm5, edx);
  psllq(xmm4, xmm5);
  movl(ecx, eax);
  sarl(eax, 1);
  pinsrw(xmm3, eax, 3);
  movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
  psllq(xmm3, 4);
  psubd(xmm2, xmm3);
  mulsd(xmm0, xmm2);
  cmpl(edx, 52);
  jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
  pand(xmm4, xmm2);
  paddd(xmm3, xmm6);
  subsd(xmm2, xmm4);
  addsd(xmm0, xmm2);
  cmpl(ecx, 1023);
  jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
  pextrw(ecx, xmm0, 3);
  andl(ecx, 32768);
  orl(edx, ecx);
  cmpl(edx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
  movapd(xmm6, xmm0);
  addsd(xmm0, xmm4);
  mulsd(xmm0, xmm3);
  pextrw(ecx, xmm0, 3);
  andl(ecx, 32752);
  cmpl(ecx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
  jmp(B1_5);

  bind(L_2TAG_PACKET_5_0_2);
  mulsd(xmm6, xmm3);
  mulsd(xmm4, xmm3);
  movdqu(xmm0, xmm6);
  pxor(xmm6, xmm4);
  psrad(xmm6, 31);
  pshufd(xmm6, xmm6, 85);
  psllq(xmm0, 1);
  psrlq(xmm0, 1);
  pxor(xmm0, xmm6);
  psrlq(xmm6, 63);
  paddq(xmm0, xmm6);
  paddq(xmm0, xmm4);
  movl(Address(rsp,0), 15);
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_4_0_2);
  addsd(xmm0, xmm4);
  mulsd(xmm0, xmm3);
  jmp(B1_5);

  bind(L_2TAG_PACKET_3_0_2);
  addsd(xmm0, xmm4);
  mulsd(xmm0, xmm3);
  pextrw(ecx, xmm0, 3);
  andl(ecx, 32752);
  cmpl(ecx, 32752);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
  jmp(B1_5);

  bind(L_2TAG_PACKET_2_0_2);
  paddd(xmm3, xmm6);
  addpd(xmm0, xmm2);
  mulsd(xmm0, xmm3);
  movl(Address(rsp,0), 15);
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_8_0_2);
  cmpl(eax, 2146435072);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
  movl(eax, Address(rsp,12));
  cmpl(eax, INT_MIN);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
  movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
  mulsd(xmm0, xmm0);

  bind(L_2TAG_PACKET_7_0_2);
  movl(Address(rsp,0), 14);
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_10_0_2);
  movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
  mulsd(xmm0, xmm0);
  movl(Address(rsp,0), 15);
  jmp(L_2TAG_PACKET_6_0_2);

  bind(L_2TAG_PACKET_9_0_2);
  movl(edx, Address(rsp,8));
  cmpl(eax, 2146435072);
  jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
  cmpl(edx, 0);
  jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
  movl(eax, Address(rsp,12));
  cmpl(eax, 2146435072);
  jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
  movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
  jmp(B1_5);

  bind(L_2TAG_PACKET_12_0_2);
  movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
  jmp(B1_5);

  bind(L_2TAG_PACKET_11_0_2);
  movsd(xmm0, Address(rsp, 8));
  addsd(xmm0, xmm0);
  jmp(B1_5);

  bind(L_2TAG_PACKET_0_0_2);
  movl(eax, Address(rsp, 12));
  andl(eax, 2147483647);
  cmpl(eax, 1083179008);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
  movsd(Address(rsp, 8), xmm0);
  addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
  jmp(B1_5);

  bind(L_2TAG_PACKET_6_0_2);
  movq(Address(rsp, 16), xmm0);

  bind(B1_3);
  movq(xmm0, Address(rsp, 16));

  bind(B1_5);
  addq(rsp, 24);
}
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
  Label L_2TAG_PACKET_10_0_2, start;

  assert_different_registers(tmp, eax, ecx, edx);
  jmp(start);
  address static_const_table = (address)_static_const_table_log;

  bind(start);
  subl(rsp, 104);
  movl(Address(rsp, 40), tmp);
  lea(tmp, ExternalAddress(static_const_table));
  xorpd(xmm2, xmm2);
  movl(eax, 16368);
  pinsrw(xmm2, eax, 3);
  xorpd(xmm3, xmm3);
  movl(edx, 30704);
  pinsrw(xmm3, edx, 3);
  movsd(xmm0, Address(rsp, 112));
  movapd(xmm1, xmm0);
  movl(ecx, 32768);
  movdl(xmm4, ecx);
  movsd(xmm5, Address(tmp, 2128));         // 0x00000000UL, 0xffffe000UL
  pextrw(eax, xmm0, 3);
  por(xmm0, xmm2);
  psllq(xmm0, 5);
  movl(ecx, 16352);
  psrlq(xmm0, 34);
  rcpss(xmm0, xmm0);
  psllq(xmm1, 12);
  pshufd(xmm6, xmm5, 228);
  psrlq(xmm1, 12);
  subl(eax, 16);
  cmpl(eax, 32736);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);

  bind(L_2TAG_PACKET_1_0_2);
  paddd(xmm0, xmm4);
  por(xmm1, xmm3);
  movdl(edx, xmm0);
  psllq(xmm0, 29);
  pand(xmm5, xmm1);
  pand(xmm0, xmm6);
  subsd(xmm1, xmm5);
  mulpd(xmm5, xmm0);
  andl(eax, 32752);
  subl(eax, ecx);
  cvtsi2sdl(xmm7, eax);
  mulsd(xmm1, xmm0);
  movsd(xmm6, Address(tmp, 2064));         // 0xfefa3800UL, 0x3fa62e42UL
  movdqu(xmm3, Address(tmp, 2080));        // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
  subsd(xmm5, xmm2);
  andl(edx, 16711680);
  shrl(edx, 12);
  movdqu(xmm0, Address(tmp, edx));
  movdqu(xmm4, Address(tmp, 2096));        // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
  addsd(xmm1, xmm5);
  movdqu(xmm2, Address(tmp, 2112));        // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
  mulsd(xmm6, xmm7);
  pshufd(xmm5, xmm1, 68);
  mulsd(xmm7, Address(tmp, 2072));         // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
  mulsd(xmm3, xmm1);
  addsd(xmm0, xmm6);
  mulpd(xmm4, xmm5);
  mulpd(xmm5, xmm5);
  pshufd(xmm6, xmm0, 228);
  addsd(xmm0, xmm1);
  addpd(xmm4, xmm2);
  mulpd(xmm3, xmm5);
  subsd(xmm6, xmm0);
  mulsd(xmm4, xmm1);
  pshufd(xmm2, xmm0, 238);
  addsd(xmm1, xmm6);
  mulsd(xmm5, xmm5);
  addsd(xmm7, xmm2);
  addpd(xmm4, xmm3);
  addsd(xmm1, xmm7);
  mulpd(xmm4, xmm5);
  addsd(xmm1, xmm4);
  pshufd(xmm5, xmm4, 238);
  addsd(xmm1, xmm5);
  addsd(xmm0, xmm1);
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_0_0_2);
  movsd(xmm0, Address(rsp, 112));
  movdqu(xmm1, xmm0);
  addl(eax, 16);
  cmpl(eax, 32768);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
  cmpl(eax, 16);
  jcc(Assembler::below, L_2TAG_PACKET_4_0_2);

  bind(L_2TAG_PACKET_5_0_2);
  addsd(xmm0, xmm0);
  jmp(L_2TAG_PACKET_2_0_2);

  bind(L_2TAG_PACKET_6_0_2);
  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
  cmpl(edx, 0);
  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
  jmp(L_2TAG_PACKET_7_0_2);

  bind(L_2TAG_PACKET_3_0_2);
  movdl(edx, xmm1);
  psrlq(xmm1, 32);
  movdl(ecx, xmm1);
  addl(ecx, ecx);
  cmpl(ecx, -2097152);
  jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
  orl(edx, ecx);
  cmpl(edx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);

  bind(L_2TAG_PACKET_7_0_2);
  xorpd(xmm1, xmm1);
  xorpd(xmm0, xmm0);
  movl(eax, 32752);
  pinsrw(xmm1, eax, 3);
  movl(edx, 3);
  mulsd(xmm0, xmm1);

  bind(L_2TAG_PACKET_9_0_2);
  movsd(Address(rsp, 0), xmm0);
  movsd(xmm0, Address(rsp, 112));
  fld_d(Address(rsp, 0));
  jmp(L_2TAG_PACKET_10_0_2);

  bind(L_2TAG_PACKET_8_0_2);
  xorpd(xmm1, xmm1);
  xorpd(xmm0, xmm0);
  movl(eax, 49136);
  pinsrw(xmm0, eax, 3);
  divsd(xmm0, xmm1);
  movl(edx, 2);
  jmp(L_2TAG_PACKET_9_0_2);

  bind(L_2TAG_PACKET_4_0_2);
  movdl(edx, xmm1);
  psrlq(xmm1, 32);
  movdl(ecx, xmm1);
  orl(edx, ecx);
  cmpl(edx, 0);
  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
  xorpd(xmm1, xmm1);
  movl(eax, 18416);
  pinsrw(xmm1, eax, 3);
  mulsd(xmm0, xmm1);
  movapd(xmm1, xmm0);
  pextrw(eax, xmm0, 3);
  por(xmm0, xmm2);
  psllq(xmm0, 5);
  movl(ecx, 18416);
  psrlq(xmm0, 34);
  rcpss(xmm0, xmm0);
  psllq(xmm1, 12);
  pshufd(xmm6, xmm5, 228);
  psrlq(xmm1, 12);
  jmp(L_2TAG_PACKET_1_0_2);

  bind(L_2TAG_PACKET_2_0_2);
  movsd(Address(rsp, 24), xmm0);
  fld_d(Address(rsp, 24));

  bind(L_2TAG_PACKET_10_0_2);
  movl(tmp, Address(rsp, 40));
}
void InterpreterStubs::generate_interpreter_fill_in_tags() {
  comment_section("Interpreter fill in tags");
  entry("interpreter_fill_in_tags");
  comment("eax: return address of method");
  comment("ebx: method");
  comment("ecx: size of parameters.  Guaranteed to be >= 1");
  comment("edx: call info from call site");
  comment("Must preserve eax, ebx, ecx");

  // stack layout:
  //   sp return address of caller
  //      --> argument n
  //      -->    ...
  //      --> argument 0

  Label extended_call_info;

  comment("Compact call info or normal call info?");
  testl(edx, edx); 
  jcc(positive, Constant(extended_call_info));

  Label loop_entry, loop_condition;
  comment("We have a compact call info");
  movl(edi, ecx);

bind(loop_entry);
  decl(edi);
  comment("Store int tag");
  movl(Address(esp, edi, times_8, Constant(BytesPerWord)), Constant(int_tag));
  comment("Test the bit in the call info");
  GUARANTEE(CallInfo::format1_tag_start == 0, "Tag must start at bit position 0 for this code to work");
  btl(edx, edi);
  jcc(carry_clear, Constant(loop_condition));
  comment("Store obj tag");
  movl(Address(esp, edi, times_8, Constant(BytesPerWord)), Constant(obj_tag));
  bind(loop_condition);
  testl(edi, edi);
  jcc(not_zero, Constant(loop_entry));
  ret();

bind(extended_call_info);
  comment("Normal call info");
  // The following code is slightly complicated.  "Bit offset" below
  // pretends like the callinfo's are in a bit array, as follows:
  //     Callinfo describing bci and offset
  //     Size [16 bits] and stack info 0-3
  //     Stack info 4-11
  // We ignore the fact that each of these words is preceded by a byte
  // that makes it look like an instruction.
  pushl(ecx); 
  pushl(ebx);
  Label loopx_entry, loopx_done;
 
  comment("Bit offset of first argument in CallInfo array");
  movzxw(edx, Address(eax, Constant(5 + 1)));  // total number of locals/expr
  subl(edx, ecx);               // number of locals/expr belonging to callee
  shll(edx, Constant(2));       // number of bits per nybble
  addl(edx, Constant(32 + 16)); // 48 bits is the 32 bit callinfo and 16bit size info

  comment("Decrement argument count; move to more convenient register");
  leal(esi, Address(ecx, Constant(-1)));

  comment("Location of tag of esi-th local");
  leal(ebx, Address(esp, Constant(3 * BytesPerWord)));

bind(loopx_entry);
  comment("eax holds the return address");
  comment("ebx holds address of the esi-th tag");
  comment("esi is the local whose tag we are setting");
  comment("edx contains the bit offset of Local 0 in the CallInfo array");
  comment("Get bit offset of esi-th local");
  leal(ecx, Address(edx, esi, times_4));

  comment("From bit offset, get word offset, then multiply by 5");
  movl(edi, ecx);
  shrl(edi, Constant(5));
  leal(edi, Address(edi, edi, times_4));

  comment("Get the appropriate CallInfo word; extract the nybble");
  movl(edi, Address(eax, edi, times_1, Constant(1)));
  shrl(edi);
  andl(edi, Constant(0xF));

  comment("Tag is (1 << value) >> 1.  This is 0 when value == 0");
  movl(ecx, edi);
  movl(edi, Constant(1));
  shll(edi);
  shrl(edi, Constant(1));

  comment("Store the tag");
  movl(Address(ebx), edi);

  comment("Are we done?");
  decl(esi); 
  addl(ebx, Constant(8));
  testl(esi, esi);
  jcc(greater_equal, Constant(loopx_entry));
bind(loopx_done);
  popl(ebx); 
  popl(ecx);
  ret();

  entry_end(); // interpreter_fill_in_tags
}
Exemple #13
0
//------------------------------------------------------------------------------
// MethodHandles::generate_method_handle_stub
//
// Generate an "entry" field for a method handle.
// This determines how the method handle will respond to calls.
void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
  // Here is the register state during an interpreted call,
  // as set up by generate_method_handle_interpreter_entry():
  // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused)
  // - rcx: receiver method handle
  // - rax: method handle type (only used by the check_mtype entry point)
  // - rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
  // - rdx: garbage temp, can blow away

  const Register rcx_recv    = rcx;
  const Register rax_argslot = rax;
  const Register rbx_temp    = rbx;
  const Register rdx_temp    = rdx;

  // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls)
  // and gen_c2i_adapter (from compiled calls):
  const Register saved_last_sp = LP64_ONLY(r13) NOT_LP64(rsi);

  // Argument registers for _raise_exception.
  // 32-bit: Pass first two oop/int args in registers ECX and EDX.
  const Register rarg0_code     = LP64_ONLY(j_rarg0) NOT_LP64(rcx);
  const Register rarg1_actual   = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
  const Register rarg2_required = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
  assert_different_registers(rarg0_code, rarg1_actual, rarg2_required, saved_last_sp);

  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");

  // some handy addresses
  Address rbx_method_fie(     rbx,      methodOopDesc::from_interpreted_offset() );
  Address rbx_method_fce(     rbx,      methodOopDesc::from_compiled_offset() );

  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
  Address rcx_dmh_vmindex(    rcx_recv, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes() );

  Address rcx_bmh_vmargslot(  rcx_recv, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes() );
  Address rcx_bmh_argument(   rcx_recv, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes() );

  Address rcx_amh_vmargslot(  rcx_recv, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes() );
  Address rcx_amh_argument(   rcx_recv, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes() );
  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
  Address vmarg;                // __ argument_address(vmargslot)

  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();

  if (have_entry(ek)) {
    __ nop();                   // empty stubs make SG sick
    return;
  }

  address interp_entry = __ pc();

  trace_method_handle(_masm, entry_name(ek));

  BLOCK_COMMENT(entry_name(ek));

  switch ((int) ek) {
  case _raise_exception:
    {
      // Not a real MH entry, but rather shared code for raising an
      // exception.  Since we use the compiled entry, arguments are
      // expected in compiler argument registers.
      assert(raise_exception_method(), "must be set");
      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");

      const Register rdi_pc = rax;
      __ pop(rdi_pc);  // caller PC
      __ mov(rsp, saved_last_sp);  // cut the stack back to where the caller started

      Register rbx_method = rbx_temp;
      Label L_no_method;
      // FIXME: fill in _raise_exception_method with a suitable java.lang.invoke method
      __ movptr(rbx_method, ExternalAddress((address) &_raise_exception_method));
      __ testptr(rbx_method, rbx_method);
      __ jccb(Assembler::zero, L_no_method);

      const int jobject_oop_offset = 0;
      __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset));  // dereference the jobject
      __ testptr(rbx_method, rbx_method);
      __ jccb(Assembler::zero, L_no_method);
      __ verify_oop(rbx_method);

      NOT_LP64(__ push(rarg2_required));
      __ push(rdi_pc);         // restore caller PC
      __ jmp(rbx_method_fce);  // jump to compiled entry

      // Do something that is at least causes a valid throw from the interpreter.
      __ bind(L_no_method);
      __ push(rarg2_required);
      __ push(rarg1_actual);
      __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry()));
    }
    break;

  case _invokestatic_mh:
  case _invokespecial_mh:
    {
      Register rbx_method = rbx_temp;
      __ load_heap_oop(rbx_method, rcx_mh_vmtarget); // target is a methodOop
      __ verify_oop(rbx_method);
      // same as TemplateTable::invokestatic or invokespecial,
      // minus the CP setup and profiling:
      if (ek == _invokespecial_mh) {
        // Must load & check the first argument before entering the target method.
        __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
        __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
        __ null_check(rcx_recv);
        __ verify_oop(rcx_recv);
      }
      __ jmp(rbx_method_fie);
    }
    break;

  case _invokevirtual_mh:
    {
      // same as TemplateTable::invokevirtual,
      // minus the CP setup and profiling:

      // pick out the vtable index and receiver offset from the MH,
      // and then we can discard it:
      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
      Register rbx_index = rbx_temp;
      __ movl(rbx_index, rcx_dmh_vmindex);
      // Note:  The verifier allows us to ignore rcx_mh_vmtarget.
      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());

      // get receiver klass
      Register rax_klass = rax_argslot;
      __ load_klass(rax_klass, rcx_recv);
      __ verify_oop(rax_klass);

      // get target methodOop & entry point
      const int base = instanceKlass::vtable_start_offset() * wordSize;
      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
      Address vtable_entry_addr(rax_klass,
                                rbx_index, Address::times_ptr,
                                base + vtableEntry::method_offset_in_bytes());
      Register rbx_method = rbx_temp;
      __ movptr(rbx_method, vtable_entry_addr);

      __ verify_oop(rbx_method);
      __ jmp(rbx_method_fie);
    }
    break;

  case _invokeinterface_mh:
    {
      // same as TemplateTable::invokeinterface,
      // minus the CP setup and profiling:

      // pick out the interface and itable index from the MH.
      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
      Register rdx_intf  = rdx_temp;
      Register rbx_index = rbx_temp;
      __ load_heap_oop(rdx_intf, rcx_mh_vmtarget);
      __ movl(rbx_index, rcx_dmh_vmindex);
      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());

      // get receiver klass
      Register rax_klass = rax_argslot;
      __ load_klass(rax_klass, rcx_recv);
      __ verify_oop(rax_klass);

      Register rdi_temp   = rdi;
      Register rbx_method = rbx_index;

      // get interface klass
      Label no_such_interface;
      __ verify_oop(rdx_intf);
      __ lookup_interface_method(rax_klass, rdx_intf,
                                 // note: next two args must be the same:
                                 rbx_index, rbx_method,
                                 rdi_temp,
                                 no_such_interface);

      __ verify_oop(rbx_method);
      __ jmp(rbx_method_fie);
      __ hlt();

      __ bind(no_such_interface);
      // Throw an exception.
      // For historical reasons, it will be IncompatibleClassChangeError.
      __ mov(rbx_temp, rcx_recv);  // rarg2_required might be RCX
      assert_different_registers(rarg2_required, rbx_temp);
      __ movptr(rarg2_required, Address(rdx_intf, java_mirror_offset));  // required interface
      __ mov(   rarg1_actual,   rbx_temp);                               // bad receiver
      __ movl(  rarg0_code,     (int) Bytecodes::_invokeinterface);      // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
    }
    break;

  case _bound_ref_mh:
  case _bound_int_mh:
  case _bound_long_mh:
  case _bound_ref_direct_mh:
  case _bound_int_direct_mh:
  case _bound_long_direct_mh:
    {
      bool direct_to_method = (ek >= _bound_ref_direct_mh);
      BasicType arg_type  = T_ILLEGAL;
      int       arg_mask  = _INSERT_NO_MASK;
      int       arg_slots = -1;
      get_ek_bound_mh_info(ek, arg_type, arg_mask, arg_slots);

      // make room for the new argument:
      __ movl(rax_argslot, rcx_bmh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      insert_arg_slots(_masm, arg_slots * stack_move_unit(), arg_mask, rax_argslot, rbx_temp, rdx_temp);

      // store bound argument into the new stack slot:
      __ load_heap_oop(rbx_temp, rcx_bmh_argument);
      if (arg_type == T_OBJECT) {
        __ movptr(Address(rax_argslot, 0), rbx_temp);
      } else {
        Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type));
        const int arg_size = type2aelembytes(arg_type);
        __ load_sized_value(rdx_temp, prim_value_addr, arg_size, is_signed_subword_type(arg_type), rbx_temp);
        __ store_sized_value(Address(rax_argslot, 0), rdx_temp, arg_size, rbx_temp);
      }

      if (direct_to_method) {
        Register rbx_method = rbx_temp;
        __ load_heap_oop(rbx_method, rcx_mh_vmtarget);
        __ verify_oop(rbx_method);
        __ jmp(rbx_method_fie);
      } else {
        __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
        __ verify_oop(rcx_recv);
        __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
      }
    }
    break;

  case _adapter_retype_only:
  case _adapter_retype_raw:
    // immediately jump to the next MH layer:
    __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
    __ verify_oop(rcx_recv);
    __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    // This is OK when all parameter types widen.
    // It is also OK when a return type narrows.
    break;

  case _adapter_check_cast:
    {
      // temps:
      Register rbx_klass = rbx_temp; // interesting AMH data

      // check a reference argument before jumping to the next layer of MH:
      __ movl(rax_argslot, rcx_amh_vmargslot);
      vmarg = __ argument_address(rax_argslot);

      // What class are we casting to?
      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));

      Label done;
      __ movptr(rdx_temp, vmarg);
      __ testptr(rdx_temp, rdx_temp);
      __ jcc(Assembler::zero, done);         // no cast if null
      __ load_klass(rdx_temp, rdx_temp);

      // live at this point:
      // - rbx_klass:  klass required by the target method
      // - rdx_temp:   argument klass to test
      // - rcx_recv:   adapter method handle
      __ check_klass_subtype(rdx_temp, rbx_klass, rax_argslot, done);

      // If we get here, the type check failed!
      // Call the wrong_method_type stub, passing the failing argument type in rax.
      Register rax_mtype = rax_argslot;
      __ movl(rax_argslot, rcx_amh_vmargslot);  // reload argslot field
      __ movptr(rdx_temp, vmarg);

      assert_different_registers(rarg2_required, rdx_temp);
      __ load_heap_oop(rarg2_required, rcx_amh_argument);             // required class
      __ mov(          rarg1_actual,   rdx_temp);                     // bad object
      __ movl(         rarg0_code,     (int) Bytecodes::_checkcast);  // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

      __ bind(done);
      // get the new MH:
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_prim_to_prim:
  case _adapter_ref_to_prim:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
    {
      // perform an in-place conversion to int or an int subword
      __ movl(rax_argslot, rcx_amh_vmargslot);
      vmarg = __ argument_address(rax_argslot);

      switch (ek) {
      case _adapter_opt_i2i:
        __ movl(rdx_temp, vmarg);
        break;
      case _adapter_opt_l2i:
        {
          // just delete the extra slot; on a little-endian machine we keep the first
          __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
          remove_arg_slots(_masm, -stack_move_unit(),
                           rax_argslot, rbx_temp, rdx_temp);
          vmarg = Address(rax_argslot, -Interpreter::stackElementSize);
          __ movl(rdx_temp, vmarg);
        }
        break;
      case _adapter_opt_unboxi:
        {
          // Load the value up from the heap.
          __ movptr(rdx_temp, vmarg);
          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
#ifdef ASSERT
          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
            if (is_subword_type(BasicType(bt)))
              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
          }
#endif
          __ null_check(rdx_temp, value_offset);
          __ movl(rdx_temp, Address(rdx_temp, value_offset));
          // We load this as a word.  Because we are little-endian,
          // the low bits will be correct, but the high bits may need cleaning.
          // The vminfo will guide us to clean those bits.
        }
        break;
      default:
        ShouldNotReachHere();
      }

      // Do the requested conversion and store the value.
      Register rbx_vminfo = rbx_temp;
      __ movl(rbx_vminfo, rcx_amh_conversion);
      assert(CONV_VMINFO_SHIFT == 0, "preshifted");

      // get the new MH:
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      // (now we are done with the old MH)

      // original 32-bit vmdata word must be of this form:
      //    | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
      __ xchgptr(rcx, rbx_vminfo);                // free rcx for shifts
      __ shll(rdx_temp /*, rcx*/);
      Label zero_extend, done;
      __ testl(rcx, CONV_VMINFO_SIGN_FLAG);
      __ jccb(Assembler::zero, zero_extend);

      // this path is taken for int->byte, int->short
      __ sarl(rdx_temp /*, rcx*/);
      __ jmpb(done);

      __ bind(zero_extend);
      // this is taken for int->char
      __ shrl(rdx_temp /*, rcx*/);

      __ bind(done);
      __ movl(vmarg, rdx_temp);  // Store the value.
      __ xchgptr(rcx, rbx_vminfo);                // restore rcx_recv

      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
    {
      // perform an in-place int-to-long or ref-to-long conversion
      __ movl(rax_argslot, rcx_amh_vmargslot);

      // on a little-endian machine we keep the first slot and add another after
      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
      insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
                       rax_argslot, rbx_temp, rdx_temp);
      Address vmarg1(rax_argslot, -Interpreter::stackElementSize);
      Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize);

      switch (ek) {
      case _adapter_opt_i2l:
        {
#ifdef _LP64
          __ movslq(rdx_temp, vmarg1);  // Load sign-extended
          __ movq(vmarg1, rdx_temp);    // Store into first slot
#else
          __ movl(rdx_temp, vmarg1);
          __ sarl(rdx_temp, BitsPerInt - 1);  // __ extend_sign()
          __ movl(vmarg2, rdx_temp); // store second word
#endif
        }
        break;
      case _adapter_opt_unboxl:
        {
          // Load the value up from the heap.
          __ movptr(rdx_temp, vmarg1);
          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
          __ null_check(rdx_temp, value_offset);
#ifdef _LP64
          __ movq(rbx_temp, Address(rdx_temp, value_offset));
          __ movq(vmarg1, rbx_temp);
#else
          __ movl(rbx_temp, Address(rdx_temp, value_offset + 0*BytesPerInt));
          __ movl(rdx_temp, Address(rdx_temp, value_offset + 1*BytesPerInt));
          __ movl(vmarg1, rbx_temp);
          __ movl(vmarg2, rdx_temp);
#endif
        }
        break;
      default:
        ShouldNotReachHere();
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
    {
      // perform an in-place floating primitive conversion
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
      if (ek == _adapter_opt_f2d) {
        insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
                         rax_argslot, rbx_temp, rdx_temp);
      }
      Address vmarg(rax_argslot, -Interpreter::stackElementSize);

#ifdef _LP64
      if (ek == _adapter_opt_f2d) {
        __ movflt(xmm0, vmarg);
        __ cvtss2sd(xmm0, xmm0);
        __ movdbl(vmarg, xmm0);
      } else {
        __ movdbl(xmm0, vmarg);
        __ cvtsd2ss(xmm0, xmm0);
        __ movflt(vmarg, xmm0);
      }
#else //_LP64
      if (ek == _adapter_opt_f2d) {
        __ fld_s(vmarg);        // load float to ST0
        __ fstp_s(vmarg);       // store single
      } else {
        __ fld_d(vmarg);        // load double to ST0
        __ fstp_s(vmarg);       // store single
      }
#endif //_LP64

      if (ek == _adapter_opt_d2f) {
        remove_arg_slots(_masm, -stack_move_unit(),
                         rax_argslot, rbx_temp, rdx_temp);
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_prim_to_ref:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  case _adapter_swap_args:
  case _adapter_rot_args:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_swap_1:
  case _adapter_opt_swap_2:
  case _adapter_opt_rot_1_up:
  case _adapter_opt_rot_1_down:
  case _adapter_opt_rot_2_up:
  case _adapter_opt_rot_2_down:
    {
      int swap_bytes = 0, rotate = 0;
      get_ek_adapter_opt_swap_rot_info(ek, swap_bytes, rotate);

      // 'argslot' is the position of the first argument to swap
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // 'vminfo' is the second
      Register rbx_destslot = rbx_temp;
      __ movl(rbx_destslot, rcx_amh_conversion);
      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
      __ andl(rbx_destslot, CONV_VMINFO_MASK);
      __ lea(rbx_destslot, __ argument_address(rbx_destslot));
      DEBUG_ONLY(verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame"));

      if (!rotate) {
        for (int i = 0; i < swap_bytes; i += wordSize) {
          __ movptr(rdx_temp, Address(rax_argslot , i));
          __ push(rdx_temp);
          __ movptr(rdx_temp, Address(rbx_destslot, i));
          __ movptr(Address(rax_argslot, i), rdx_temp);
          __ pop(rdx_temp);
          __ movptr(Address(rbx_destslot, i), rdx_temp);
        }
      } else {
        // push the first chunk, which is going to get overwritten
        for (int i = swap_bytes; (i -= wordSize) >= 0; ) {
          __ movptr(rdx_temp, Address(rax_argslot, i));
          __ push(rdx_temp);
        }

        if (rotate > 0) {
          // rotate upward
          __ subptr(rax_argslot, swap_bytes);
#ifdef ASSERT
          {
            // Verify that argslot > destslot, by at least swap_bytes.
            Label L_ok;
            __ cmpptr(rax_argslot, rbx_destslot);
            __ jccb(Assembler::aboveEqual, L_ok);
            __ stop("source must be above destination (upward rotation)");
            __ bind(L_ok);
          }
#endif
          // work argslot down to destslot, copying contiguous data upwards
          // pseudo-code:
          //   rax = src_addr - swap_bytes
          //   rbx = dest_addr
          //   while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--;
          Label loop;
          __ bind(loop);
          __ movptr(rdx_temp, Address(rax_argslot, 0));
          __ movptr(Address(rax_argslot, swap_bytes), rdx_temp);
          __ addptr(rax_argslot, -wordSize);
          __ cmpptr(rax_argslot, rbx_destslot);
          __ jccb(Assembler::aboveEqual, loop);
        } else {
          __ addptr(rax_argslot, swap_bytes);
#ifdef ASSERT
          {
            // Verify that argslot < destslot, by at least swap_bytes.
            Label L_ok;
            __ cmpptr(rax_argslot, rbx_destslot);
            __ jccb(Assembler::belowEqual, L_ok);
            __ stop("source must be below destination (downward rotation)");
            __ bind(L_ok);
          }
#endif
          // work argslot up to destslot, copying contiguous data downwards
          // pseudo-code:
          //   rax = src_addr + swap_bytes
          //   rbx = dest_addr
          //   while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++;
          Label loop;
          __ bind(loop);
          __ movptr(rdx_temp, Address(rax_argslot, 0));
          __ movptr(Address(rax_argslot, -swap_bytes), rdx_temp);
          __ addptr(rax_argslot, wordSize);
          __ cmpptr(rax_argslot, rbx_destslot);
          __ jccb(Assembler::belowEqual, loop);
        }

        // pop the original first chunk into the destination slot, now free
        for (int i = 0; i < swap_bytes; i += wordSize) {
          __ pop(rdx_temp);
          __ movptr(Address(rbx_destslot, i), rdx_temp);
        }
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_dup_args:
    {
      // 'argslot' is the position of the first argument to duplicate
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // 'stack_move' is negative number of words to duplicate
      Register rdx_stack_move = rdx_temp;
      __ movl2ptr(rdx_stack_move, rcx_amh_conversion);
      __ sarptr(rdx_stack_move, CONV_STACK_MOVE_SHIFT);

      int argslot0_num = 0;
      Address argslot0 = __ argument_address(RegisterOrConstant(argslot0_num));
      assert(argslot0.base() == rsp, "");
      int pre_arg_size = argslot0.disp();
      assert(pre_arg_size % wordSize == 0, "");
      assert(pre_arg_size > 0, "must include PC");

      // remember the old rsp+1 (argslot[0])
      Register rbx_oldarg = rbx_temp;
      __ lea(rbx_oldarg, argslot0);

      // move rsp down to make room for dups
      __ lea(rsp, Address(rsp, rdx_stack_move, Address::times_ptr));

      // compute the new rsp+1 (argslot[0])
      Register rdx_newarg = rdx_temp;
      __ lea(rdx_newarg, argslot0);

      __ push(rdi);             // need a temp
      // (preceding push must be done after arg addresses are taken!)

      // pull down the pre_arg_size data (PC)
      for (int i = -pre_arg_size; i < 0; i += wordSize) {
        __ movptr(rdi, Address(rbx_oldarg, i));
        __ movptr(Address(rdx_newarg, i), rdi);
      }

      // copy from rax_argslot[0...] down to new_rsp[1...]
      // pseudo-code:
      //   rbx = old_rsp+1
      //   rdx = new_rsp+1
      //   rax = argslot
      //   while (rdx < rbx) *rdx++ = *rax++
      Label loop;
      __ bind(loop);
      __ movptr(rdi, Address(rax_argslot, 0));
      __ movptr(Address(rdx_newarg, 0), rdi);
      __ addptr(rax_argslot, wordSize);
      __ addptr(rdx_newarg, wordSize);
      __ cmpptr(rdx_newarg, rbx_oldarg);
      __ jccb(Assembler::less, loop);

      __ pop(rdi);              // restore temp

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_drop_args:
    {
      // 'argslot' is the position of the first argument to nuke
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      __ push(rdi);             // need a temp
      // (must do previous push after argslot address is taken)

      // 'stack_move' is number of words to drop
      Register rdi_stack_move = rdi;
      __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
      __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
      remove_arg_slots(_masm, rdi_stack_move,
                       rax_argslot, rbx_temp, rdx_temp);

      __ pop(rdi);              // restore temp

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_collect_args:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  case _adapter_spread_args:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_spread_0:
  case _adapter_opt_spread_1:
  case _adapter_opt_spread_more:
    {
      // spread an array out into a group of arguments
      int length_constant = get_ek_adapter_opt_spread_info(ek);

      // find the address of the array argument
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // grab some temps
      { __ push(rsi); __ push(rdi); }
      // (preceding pushes must be done after argslot address is taken!)
#define UNPUSH_RSI_RDI \
      { __ pop(rdi); __ pop(rsi); }

      // arx_argslot points both to the array and to the first output arg
      vmarg = Address(rax_argslot, 0);

      // Get the array value.
      Register  rsi_array       = rsi;
      Register  rdx_array_klass = rdx_temp;
      BasicType elem_type       = T_OBJECT;
      int       length_offset   = arrayOopDesc::length_offset_in_bytes();
      int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
      __ movptr(rsi_array, vmarg);
      Label skip_array_check;
      if (length_constant == 0) {
        __ testptr(rsi_array, rsi_array);
        __ jcc(Assembler::zero, skip_array_check);
      }
      __ null_check(rsi_array, oopDesc::klass_offset_in_bytes());
      __ load_klass(rdx_array_klass, rsi_array);

      // Check the array type.
      Register rbx_klass = rbx_temp;
      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));

      Label ok_array_klass, bad_array_klass, bad_array_length;
      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi, ok_array_klass);
      // If we get here, the type check failed!
      __ jmp(bad_array_klass);
      __ bind(ok_array_klass);

      // Check length.
      if (length_constant >= 0) {
        __ cmpl(Address(rsi_array, length_offset), length_constant);
      } else {
        Register rbx_vminfo = rbx_temp;
        __ movl(rbx_vminfo, rcx_amh_conversion);
        assert(CONV_VMINFO_SHIFT == 0, "preshifted");
        __ andl(rbx_vminfo, CONV_VMINFO_MASK);
        __ cmpl(rbx_vminfo, Address(rsi_array, length_offset));
      }
      __ jcc(Assembler::notEqual, bad_array_length);

      Register rdx_argslot_limit = rdx_temp;

      // Array length checks out.  Now insert any required stack slots.
      if (length_constant == -1) {
        // Form a pointer to the end of the affected region.
        __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
        // 'stack_move' is negative number of words to insert
        Register rdi_stack_move = rdi;
        __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
        __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
        Register rsi_temp = rsi_array;  // spill this
        insert_arg_slots(_masm, rdi_stack_move, -1,
                         rax_argslot, rbx_temp, rsi_temp);
        // reload the array (since rsi was killed)
        __ movptr(rsi_array, vmarg);
      } else if (length_constant > 1) {
        int arg_mask = 0;
        int new_slots = (length_constant - 1);
        for (int i = 0; i < new_slots; i++) {
          arg_mask <<= 1;
          arg_mask |= _INSERT_REF_MASK;
        }
        insert_arg_slots(_masm, new_slots * stack_move_unit(), arg_mask,
                         rax_argslot, rbx_temp, rdx_temp);
      } else if (length_constant == 1) {
        // no stack resizing required
      } else if (length_constant == 0) {
        remove_arg_slots(_masm, -stack_move_unit(),
                         rax_argslot, rbx_temp, rdx_temp);
      }

      // Copy from the array to the new slots.
      // Note: Stack change code preserves integrity of rax_argslot pointer.
      // So even after slot insertions, rax_argslot still points to first argument.
      if (length_constant == -1) {
        // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
        Register rsi_source = rsi_array;
        __ lea(rsi_source, Address(rsi_array, elem0_offset));
        Label loop;
        __ bind(loop);
        __ movptr(rbx_temp, Address(rsi_source, 0));
        __ movptr(Address(rax_argslot, 0), rbx_temp);
        __ addptr(rsi_source, type2aelembytes(elem_type));
        __ addptr(rax_argslot, Interpreter::stackElementSize);
        __ cmpptr(rax_argslot, rdx_argslot_limit);
        __ jccb(Assembler::less, loop);
      } else if (length_constant == 0) {
        __ bind(skip_array_check);
        // nothing to copy
      } else {
        int elem_offset = elem0_offset;
        int slot_offset = 0;
        for (int index = 0; index < length_constant; index++) {
          __ movptr(rbx_temp, Address(rsi_array, elem_offset));
          __ movptr(Address(rax_argslot, slot_offset), rbx_temp);
          elem_offset += type2aelembytes(elem_type);
           slot_offset += Interpreter::stackElementSize;
        }
      }

      // Arguments are spread.  Move to next method handle.
      UNPUSH_RSI_RDI;
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);

      __ bind(bad_array_klass);
      UNPUSH_RSI_RDI;
      assert(!vmarg.uses(rarg2_required), "must be different registers");
      __ movptr(rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
      __ movptr(rarg1_actual,   vmarg);                                         // bad array
      __ movl(  rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

      __ bind(bad_array_length);
      UNPUSH_RSI_RDI;
      assert(!vmarg.uses(rarg2_required), "must be different registers");
      __ mov   (rarg2_required, rcx_recv);                       // AMH requiring a certain length
      __ movptr(rarg1_actual,   vmarg);                          // bad array
      __ movl(  rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

#undef UNPUSH_RSI_RDI
    }
    break;

  case _adapter_flyby:
  case _adapter_ricochet:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  default:  ShouldNotReachHere();
  }
  __ hlt();

  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI

  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
}
Exemple #14
0
static void usb_pta_init(void)
{
	unsigned int usb_dma_ctrl = 0;
	/* Set PTA mode to USB */
	andl(AR9170_PTA_REG_DMA_MODE_CTRL,
		~AR9170_PTA_DMA_MODE_CTRL_DISABLE_USB);

	/* Do a software reset to PTA component */
	orl(AR9170_PTA_REG_DMA_MODE_CTRL, AR9170_PTA_DMA_MODE_CTRL_RESET);
	andl(AR9170_PTA_REG_DMA_MODE_CTRL, ~AR9170_PTA_DMA_MODE_CTRL_RESET);

	if (usb_detect_highspeed()) {
		fw.usb.os_cfg_desc = &usb_config_fullspeed;
		fw.usb.cfg_desc = &usb_config_highspeed;

		/* 512 Byte DMA transfers */
		usb_dma_ctrl |= AR9170_USB_DMA_CTL_HIGH_SPEED;
	} else {
		fw.usb.cfg_desc = &usb_config_fullspeed;
		fw.usb.os_cfg_desc = &usb_config_highspeed;
	}

#ifdef CONFIG_CARL9170FW_USB_UP_STREAM
# if (CONFIG_CARL9170FW_RX_FRAME_LEN == 4096)
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_UP_STREAM_4K;
# elif (CONFIG_CARL9170FW_RX_FRAME_LEN == 8192)
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_UP_STREAM_8K;
# elif (CONFIG_CARL9170FW_RX_FRAME_LEN == 16384)
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_UP_STREAM_16K;
# elif (CONFIG_CARL9170FW_RX_FRAME_LEN == 32768)
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_UP_STREAM_32K;
# else
#	error "Invalid AR9170_RX_FRAME_LEN setting"
# endif

#else /* CONFIG_CARL9170FW_USB_UP_STREAM */
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_UP_PACKET_MODE;
#endif /* CONFIG_CARL9170FW_USB_UP_STREAM */

#ifdef CONFIG_CARL9170FW_USB_DOWN_STREAM
	/* Enable down stream mode */
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_DOWN_STREAM;
#endif /* CONFIG_CARL9170FW_USB_DOWN_STREAM */

#ifdef CONFIG_CARL9170FW_USB_UP_STREAM
	/* Set the up stream mode maximum aggregate number */
	set(AR9170_USB_REG_MAX_AGG_UPLOAD, 4);

	/*
	 * Set the up stream mode timeout value.
	 * NB: The vendor driver (otus) set 0x80?
	 */
	set(AR9170_USB_REG_UPLOAD_TIME_CTL, 0x80);
#endif /* CONFIG_CARL9170FW_USB_UP_STREAM */

	/* Enable up stream and down stream */
	usb_dma_ctrl |= AR9170_USB_DMA_CTL_ENABLE_TO_DEVICE |
	    AR9170_USB_DMA_CTL_ENABLE_FROM_DEVICE;

	set(AR9170_USB_REG_DMA_CTL, usb_dma_ctrl);
}
Exemple #15
0
    address generate_getPsrInfo() {
        // Flags to test CPU type.
        const uint32_t HS_EFL_AC           = 0x40000;
        const uint32_t HS_EFL_ID           = 0x200000;
        // Values for when we don't have a CPUID instruction.
        const int      CPU_FAMILY_SHIFT = 8;
        const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
        const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);

        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
        Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;

        StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
#   define __ _masm->

        address start = __ pc();

        //
        // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
        //
        // LP64: rcx and rdx are first and second argument registers on windows

        __ push(rbp);
#ifdef _LP64
        __ mov(rbp, c_rarg0); // cpuid_info address
#else
        __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
#endif
        __ push(rbx);
        __ push(rsi);
        __ pushf();          // preserve rbx, and flags
        __ pop(rax);
        __ push(rax);
        __ mov(rcx, rax);
        //
        // if we are unable to change the AC flag, we have a 386
        //
        __ xorl(rax, HS_EFL_AC);
        __ push(rax);
        __ popf();
        __ pushf();
        __ pop(rax);
        __ cmpptr(rax, rcx);
        __ jccb(Assembler::notEqual, detect_486);

        __ movl(rax, CPU_FAMILY_386);
        __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
        __ jmp(done);

        //
        // If we are unable to change the ID flag, we have a 486 which does
        // not support the "cpuid" instruction.
        //
        __ bind(detect_486);
        __ mov(rax, rcx);
        __ xorl(rax, HS_EFL_ID);
        __ push(rax);
        __ popf();
        __ pushf();
        __ pop(rax);
        __ cmpptr(rcx, rax);
        __ jccb(Assembler::notEqual, detect_586);

        __ bind(cpu486);
        __ movl(rax, CPU_FAMILY_486);
        __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
        __ jmp(done);

        //
        // At this point, we have a chip which supports the "cpuid" instruction
        //
        __ bind(detect_586);
        __ xorl(rax, rax);
        __ cpuid();
        __ orl(rax, rax);
        __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
        // value of at least 1, we give up and
        // assume a 486
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
        __ jccb(Assembler::belowEqual, std_cpuid4);

        //
        // cpuid(0xB) Processor Topology
        //
        __ movl(rax, 0xb);
        __ xorl(rcx, rcx);   // Threads level
        __ cpuid();

        __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        __ movl(rax, 0xb);
        __ movl(rcx, 1);     // Cores level
        __ cpuid();
        __ push(rax);
        __ andl(rax, 0x1f);  // Determine if valid topology level
        __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
        __ andl(rax, 0xffff);
        __ pop(rax);
        __ jccb(Assembler::equal, std_cpuid4);

        __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        __ movl(rax, 0xb);
        __ movl(rcx, 2);     // Packages level
        __ cpuid();
        __ push(rax);
        __ andl(rax, 0x1f);  // Determine if valid topology level
        __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
        __ andl(rax, 0xffff);
        __ pop(rax);
        __ jccb(Assembler::equal, std_cpuid4);

        __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // cpuid(0x4) Deterministic cache params
        //
        __ bind(std_cpuid4);
        __ movl(rax, 4);
        __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
        __ jccb(Assembler::greater, std_cpuid1);

        __ xorl(rcx, rcx);   // L1 cache
        __ cpuid();
        __ push(rax);
        __ andl(rax, 0x1f);  // Determine if valid cache parameters used
        __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
        __ pop(rax);
        __ jccb(Assembler::equal, std_cpuid1);

        __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // Standard cpuid(0x1)
        //
        __ bind(std_cpuid1);
        __ movl(rax, 1);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // Check if OS has enabled XGETBV instruction to access XCR0
        // (OSXSAVE feature flag) and CPU supports AVX
        //
        __ andl(rcx, 0x18000000);
        __ cmpl(rcx, 0x18000000);
        __ jccb(Assembler::notEqual, sef_cpuid);

        //
        // XCR0, XFEATURE_ENABLED_MASK register
        //
        __ xorl(rcx, rcx);   // zero for XCR0 register
        __ xgetbv();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rdx);

        //
        // cpuid(0x7) Structured Extended Features
        //
        __ bind(sef_cpuid);
        __ movl(rax, 7);
        __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
        __ jccb(Assembler::greater, ext_cpuid);

        __ xorl(rcx, rcx);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);

        //
        // Extended cpuid(0x80000000)
        //
        __ bind(ext_cpuid);
        __ movl(rax, 0x80000000);
        __ cpuid();
        __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
        __ jcc(Assembler::belowEqual, done);
        __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
        __ jccb(Assembler::belowEqual, ext_cpuid1);
        __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
        __ jccb(Assembler::belowEqual, ext_cpuid5);
        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
        __ jccb(Assembler::belowEqual, ext_cpuid7);
        //
        // Extended cpuid(0x80000008)
        //
        __ movl(rax, 0x80000008);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // Extended cpuid(0x80000007)
        //
        __ bind(ext_cpuid7);
        __ movl(rax, 0x80000007);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // Extended cpuid(0x80000005)
        //
        __ bind(ext_cpuid5);
        __ movl(rax, 0x80000005);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // Extended cpuid(0x80000001)
        //
        __ bind(ext_cpuid1);
        __ movl(rax, 0x80000001);
        __ cpuid();
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
        __ movl(Address(rsi, 0), rax);
        __ movl(Address(rsi, 4), rbx);
        __ movl(Address(rsi, 8), rcx);
        __ movl(Address(rsi,12), rdx);

        //
        // return
        //
        __ bind(done);
        __ popf();
        __ pop(rsi);
        __ pop(rbx);
        __ pop(rbp);
        __ ret(0);

#   undef __

        return start;
    };