Exemple #1
0
int main ()
{
  testc (0, 0);
  testc (1, 1);
  testc (SCHAR_MAX, 1);
  testc (SCHAR_MAX+1, 0);
  testc (UCHAR_MAX, 0);

  tests (0, 0);
  tests (1, 1);
  tests (SHRT_MAX, 1);
  tests (SHRT_MAX+1, 0);
  tests (USHRT_MAX, 0);

  testi (0, 0);
  testi (1, 1);
  testi (INT_MAX, 1);
  testi (INT_MAX+1U, 0);
  testi (UINT_MAX, 0);

  testl (0, 0);
  testl (1, 1);
  testl (LONG_MAX, 1);
  testl (LONG_MAX+1UL, 0);
  testl (ULONG_MAX, 0);

  return 0;
}
Exemple #2
0
int
main(int argc, char *argv[])
{

	printf("1..3\n");

	test_invalid(0.0, 0.0);
	test_invalid(1.0, 0.0);
	test_invalid(INFINITY, 0.0);
	test_invalid(INFINITY, 1.0);
	test_invalid(-INFINITY, 1.0);
	test_invalid(NAN, 1.0);
	test_invalid(1.0, NAN);

	test(4, 4, 0, 1);
	test(0, 3.0, 0, 0);
	testd(0x1p-1074, 1, 0x1p-1074, 0);
	testf(0x1p-149, 1, 0x1p-149, 0);
	test(3.0, 4, -1, 1);
	test(3.0, -4, -1, -1);
	testd(275 * 1193040, 275, 0, 1193040);
	test(4.5 * 7.5, 4.5, -2.25, 8);	/* we should get the even one */
	testf(0x1.9044f6p-1, 0x1.ce662ep-1, -0x1.f109cp-4, 1);
#if LDBL_MANT_DIG > 53
	testl(-0x1.23456789abcdefp-2000L, 0x1.fedcba987654321p-2000L,
	      0x1.b72ea61d950c862p-2001L, -1);
#endif

	printf("ok 1 - rem\n");

	/*
	 * The actual quotient here is 864062210.50000003..., but
	 * double-precision division gets -8.64062210.5, which rounds
	 * the wrong way.  This test ensures that remquo() is smart
	 * enough to get the low-order bit right.
	 */
	testd(-0x1.98260f22fc6dep-302, 0x1.fb3167c430a13p-332,
	    0x1.fb3165b82de72p-333, -864062211);
	/* Even harder cases with greater exponent separation */
	test(0x1.fp100, 0x1.ep-40, -0x1.cp-41, 143165577);
	testd(-0x1.abcdefp120, 0x1.87654321p-120,
	    -0x1.69c78ec4p-121, -63816414);

	printf("ok 2 - rem\n");

	test(0x1.66666cp+120, 0x1p+71, 0.0, 1476395008);
	testd(-0x1.0000000000003p+0, 0x1.0000000000003p+0, -0.0, -1);
	testl(-0x1.0000000000003p+0, 0x1.0000000000003p+0, -0.0, -1);
	testd(-0x1.0000000000001p-749, 0x1.4p-1072, 0x1p-1074, -1288490189);
	testl(-0x1.0000000000001p-749, 0x1.4p-1072, 0x1p-1074, -1288490189);

	printf("ok 3 - rem\n");

	return (0);
}
Exemple #3
0
int main()
{
  test(0.0);
  testf(0.0);
  testl(0.0);
  return 0;
}
Exemple #4
0
int main ()
{
    test (1.0);
    testf (1.0f);
    testl (1.0l);
    return 0;
}
Exemple #5
0
        mnist_dataset(const std::string& path, const bool out_of_n_coding = true){
            std::cout << "Reading MNIST dataset..."<<std::flush;
            std::ifstream ftraind((path + "/train-images.idx3-ubyte").c_str(),std::ios::in | std::ios::binary); // image data
            std::ifstream ftrainl((path + "/train-labels.idx1-ubyte").c_str(),std::ios::in | std::ios::binary); // label data
            std::ifstream ftestd ((path + "/t10k-images.idx3-ubyte").c_str(),std::ios::in | std::ios::binary); // image data
            std::ifstream ftestl ((path + "/t10k-labels.idx1-ubyte").c_str(),std::ios::in | std::ios::binary); // label data
            assert(ftraind.is_open());
            assert(ftrainl.is_open());
            assert(ftestd.is_open());
            assert(ftestl.is_open());

            char buf[16];
            ftraind.read(buf,16);
            ftrainl.read(buf, 8);
            ftestd.read(buf,16);
            ftestl.read(buf, 8);
            cuv::tensor<unsigned char,cuv::host_memory_space> traind(cuv::extents[60000][784]);
            cuv::tensor<unsigned char,cuv::host_memory_space> trainl(cuv::extents[60000]);
            cuv::tensor<unsigned char,cuv::host_memory_space> testd(cuv::extents[10000][784]);
            cuv::tensor<unsigned char,cuv::host_memory_space> testl(cuv::extents[10000]);
            ftraind.read((char*)traind.ptr(), traind.size()); assert(ftraind.good());
            ftrainl.read((char*)trainl.ptr(), trainl.size()); assert(ftrainl.good());
            ftestd.read((char*)testd.ptr(), testd.size());    assert(ftestd.good());
            ftestl.read((char*)testl.ptr(), testl.size());    assert(ftestl.good());

            train_data.resize(traind.shape());
            test_data.resize(testd.shape());
            convert(train_data , traind); // convert data type
            convert(test_data  , testd); // convert data type

            if (out_of_n_coding){
                train_labels.resize(cuv::extents[60000][10]);
                test_labels.resize(cuv::extents[10000][10]);
                train_labels = 0.f;
                test_labels = 0.f;
                for (unsigned int i = 0; i < trainl.size(); ++i){
                    train_labels(i, trainl[i]) = 1.f;
                }
                for (unsigned int i = 0; i < testl.size(); ++i){
                    test_labels(i, testl[i]) = 1.f;
                }
            } else {
                train_labels.resize(cuv::extents[60000]);
                test_labels.resize(cuv::extents[10000]);
                for (unsigned int i = 0; i < trainl.size(); ++i){
                    train_labels(i) = trainl[i];
                }
                for (unsigned int i = 0; i < testl.size(); ++i){
                    test_labels(i) = testl[i];
                }
            }

            //train_data = train_data[cuv::indices[cuv::index_range(0,5000)][cuv::index_range()]];
            //train_labels = train_labels[cuv::indices[cuv::index_range(0,5000)][cuv::index_range()]];

            binary = true;
            channels = 1;
            image_size = 28;
            std::cout << "done."<<std::endl;
        }
Exemple #6
0
// Helper to insert argument slots into the stack.
// arg_slots must be a multiple of stack_move_unit() and <= 0
void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
                                     RegisterOrConstant arg_slots,
                                     int arg_mask,
                                     Register rax_argslot,
                                     Register rbx_temp, Register rdx_temp, Register temp3_reg) {
  assert(temp3_reg == noreg, "temp3 not required");
  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));

#ifdef ASSERT
  verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame");
  if (arg_slots.is_register()) {
    Label L_ok, L_bad;
    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
    __ jccb(Assembler::greater, L_bad);
    __ testl(arg_slots.as_register(), -stack_move_unit() - 1);
    __ jccb(Assembler::zero, L_ok);
    __ bind(L_bad);
    __ stop("assert arg_slots <= 0 and clear low bits");
    __ bind(L_ok);
  } else {
    assert(arg_slots.as_constant() <= 0, "");
    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
  }
#endif //ASSERT

#ifdef _LP64
  if (arg_slots.is_register()) {
    // clean high bits of stack motion register (was loaded as an int)
    __ movslq(arg_slots.as_register(), arg_slots.as_register());
  }
#endif

  // Make space on the stack for the inserted argument(s).
  // Then pull down everything shallower than rax_argslot.
  // The stacked return address gets pulled down with everything else.
  // That is, copy [rsp, argslot) downward by -size words.  In pseudo-code:
  //   rsp -= size;
  //   for (rdx = rsp + size; rdx < argslot; rdx++)
  //     rdx[-size] = rdx[0]
  //   argslot -= size;
  BLOCK_COMMENT("insert_arg_slots {");
  __ mov(rdx_temp, rsp);                        // source pointer for copy
  __ lea(rsp, Address(rsp, arg_slots, Address::times_ptr));
  {
    Label loop;
    __ BIND(loop);
    // pull one word down each time through the loop
    __ movptr(rbx_temp, Address(rdx_temp, 0));
    __ movptr(Address(rdx_temp, arg_slots, Address::times_ptr), rbx_temp);
    __ addptr(rdx_temp, wordSize);
    __ cmpptr(rdx_temp, rax_argslot);
    __ jccb(Assembler::less, loop);
  }

  // Now move the argslot down, to point to the opened-up space.
  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Address::times_ptr));
  BLOCK_COMMENT("} insert_arg_slots");
}
Exemple #7
0
int
main ()
{
  int res[6];
  testf (f, res);
  test (d, res);
  testl (ld, res);
  return 0;
}
void CompilerStubs::generate_compiler_new_object() {
  comment_section("Compiler new object (any size)");
  comment("Register edx holds the instance size, register ebx holds the prototypical near of the instance class");
  Label slow_case;
  entry("compiler_new_object");

  comment("Get _inline_allocation_top");
  movl(eax, Address(Constant("_inline_allocation_top")));

  comment("Compute new top");
  leal(ecx, Address(eax, edx, times_1));

  if (GenerateDebugAssembly) {
    comment("Check ExcessiveGC");
    testl(Address(Constant("ExcessiveGC")), Constant(0));
    jcc(not_zero, Constant(slow_case));
  }

  comment("Compare against _inline_allocation_end");
  cmpl(ecx, Address(Constant("_inline_allocation_end")));
  jcc(above, Constant(slow_case));

  comment("Allocation succeeded, set _inline_allocation_top");
  movl(Address(Constant("_inline_allocation_top")), ecx);

  comment("Set prototypical near in object; no need for write barrier");
  movl(Address(eax), ebx);

  comment("Compute remaining size");
  decrement(edx, oopSize);

  comment("One-word object?");
  Label init_done;
  jcc(zero, Constant(init_done));

  comment("Zero object fields");
  xorl(ecx, ecx);
  Label init_loop;
  bind(init_loop);
  movl(Address(eax, edx, times_1), ecx);
  decrement(edx, oopSize);
  jcc(not_zero, Constant(init_loop));
  bind(init_done);

  comment("The newly allocated object is in register eax");
  ret();

  comment("Slow case - call the VM runtime system");
  bind(slow_case);
  leal(eax, Address(Constant("newobject")));
  goto_shared_call_vm(T_OBJECT);

  entry_end(); // compiler_new_object
}
  address generate_forward_exception() {
    StubCodeMark mark(this, "StubRoutines", "forward exception");
    address start = __ pc();

    // Upon entry, the sp points to the return address returning into Java
    // (interpreted or compiled) code; i.e., the return address becomes the
    // throwing pc.
    //
    // Arguments pushed before the runtime call are still on the stack but
    // the exception handler will reset the stack pointer -> ignore them.
    // A potential result in registers can be ignored as well.

#ifdef ASSERT
    // make sure this code is only executed if there is a pending exception
    { Label L;
      __ get_thread(ecx);
      __ cmpl(Address(ecx, Thread::pending_exception_offset()), (int)NULL);
      __ jcc(Assembler::notEqual, L);
      __ stop("StubRoutines::forward exception: no pending exception (1)");
      __ bind(L);
    }
#endif

    // compute exception handler into ebx
    __ movl(eax, Address(esp));
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), eax);
    __ movl(ebx, eax);

    // setup eax & edx, remove return address & clear pending exception
    __ get_thread(ecx);
    __ popl(edx);
    __ movl(eax, Address(ecx, Thread::pending_exception_offset()));
    __ movl(Address(ecx, Thread::pending_exception_offset()), (int)NULL);

#ifdef ASSERT
    // make sure exception is set
    { Label L;
      __ testl(eax, eax);
      __ jcc(Assembler::notEqual, L);
      __ stop("StubRoutines::forward exception: no pending exception (2)");
      __ bind(L);
    }
#endif

    // continue at exception handler (return address removed)
    // eax: exception
    // ebx: exception handler
    // edx: throwing pc
    __ verify_oop(eax);
    __ jmp(ebx);

    return start;
  }
Exemple #10
0
int main()
{
  if (testf (__FLT_MAX__) < 1)
    abort ();

  if (test (__DBL_MAX__) < 1)
    abort ();

  if (testl (__LDBL_MAX__) < 1)
    abort ();

  return 0;
}
Exemple #11
0
void MnistDataSet<M>::read() {

	std::string path = "/home/local/datasets/MNIST";
	ifstream ftraind((path + "/train-images.idx3-ubyte").c_str());
	ifstream ftrainl((path + "/train-labels.idx1-ubyte").c_str());
	ifstream ftestd ((path + "/t10k-images.idx3-ubyte").c_str());
	ifstream ftestl ((path + "/t10k-labels.idx1-ubyte").c_str());

	char buf[16];
	ftraind.read(buf,16); ftrainl.read(buf, 8);
	ftestd.read(buf,16); ftestl.read(buf, 8);
	tensor<unsigned char, host_memory_space> traind(extents[n_trainData][n_dim]);
	tensor<unsigned char, host_memory_space> trainl(extents[n_trainData]);
	tensor<unsigned char, host_memory_space> testd(extents[n_testData][n_dim]);
	tensor<unsigned char, host_memory_space> testl(extents[n_testData]);
	ftraind.read((char*)traind.ptr(), traind.size());
	assert(ftraind.good());
	ftrainl.read((char*)trainl.ptr(), trainl.size());
	assert(ftrainl.good());
	ftestd.read((char*)testd.ptr(), testd.size());
	assert(ftestd.good());
	ftestl.read((char*)testl.ptr(), testl.size());
	assert(ftestl.good());

	tensor<unsigned char, M> train_d(extents[n_trainData][n_dim]);
	tensor<unsigned char, M> train_l(extents[n_trainData]);
	tensor<unsigned char, M> test_d(extents[n_testData][n_dim]);
	tensor<unsigned char, M> test_l(extents[n_testData]);

	train_d = traind;
	train_l = trainl;
	test_d = testd;
	test_l = testl;

	// conversion to float:
	convert(this->X_train, train_d);
	convert(this->Y_train, train_l);
	convert(this->X_test, test_d);
	convert(this->Y_test, test_l);

}
Exemple #12
0
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
  StubCodeMark mark(this, "ICache", "flush_icache_stub");

  address start = __ pc();
#ifdef AMD64

  const Register addr  = c_rarg0;
  const Register lines = c_rarg1;
  const Register magic = c_rarg2;

  Label flush_line, done;

  __ testl(lines, lines);
  __ jcc(Assembler::zero, done);

  // Force ordering wrt cflush.
  // Other fence and sync instructions won't do the job.
  __ mfence();

  __ bind(flush_line);
  __ clflush(Address(addr, 0));
  __ addptr(addr, ICache::line_size);
  __ decrementl(lines);
  __ jcc(Assembler::notZero, flush_line);

  __ mfence();

  __ bind(done);

#else
  const Address magic(rsp, 3*wordSize);
  __ lock(); __ addl(Address(rsp, 0), 0);
#endif // AMD64
  __ movptr(rax, magic); // Handshake with caller to make sure it happened!
  __ ret(0);

  // Must be set here so StubCodeMark destructor can call the flush stub.
  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
}
void CompilerStubs::generate_compiler_idiv_irem() {
  comment_section("Compiler integer divide and remainder");
  comment("Register eax holds the dividend, register ebx holds the divisor");

  entry("compiler_idiv_irem");
  const int min_int = 0x80000000;
  Label normal_case, special_case;

  Label throw_exception;

  testl(ebx,ebx);
  jcc(equal, Constant(throw_exception));

  // Check for special case
  cmpl(eax, Constant(min_int));
  jcc(not_equal, Constant(normal_case));
  xorl(edx, edx); // Prepare edx for possible special case (where remainder = 0)
  cmpl(ebx, Constant(-1));
  jcc(equal, Constant(special_case));

  // Handle normal case
  bind(normal_case);
  cdql();
  idivl(ebx);

  // Normal and special case exit
  bind(special_case);
  ret();

  bind(throw_exception);
  comment("Throw a DivisionByZeroException");
  leal(eax, Address(Constant("division_by_zero_exception")));
  goto_shared_call_vm(T_VOID);

  entry_end(); // compiler_idiv_irem
}
void InterpreterStubs::generate_interpreter_fill_in_tags() {
  comment_section("Interpreter fill in tags");
  entry("interpreter_fill_in_tags");
  comment("eax: return address of method");
  comment("ebx: method");
  comment("ecx: size of parameters.  Guaranteed to be >= 1");
  comment("edx: call info from call site");
  comment("Must preserve eax, ebx, ecx");

  // stack layout:
  //   sp return address of caller
  //      --> argument n
  //      -->    ...
  //      --> argument 0

  Label extended_call_info;

  comment("Compact call info or normal call info?");
  testl(edx, edx); 
  jcc(positive, Constant(extended_call_info));

  Label loop_entry, loop_condition;
  comment("We have a compact call info");
  movl(edi, ecx);

bind(loop_entry);
  decl(edi);
  comment("Store int tag");
  movl(Address(esp, edi, times_8, Constant(BytesPerWord)), Constant(int_tag));
  comment("Test the bit in the call info");
  GUARANTEE(CallInfo::format1_tag_start == 0, "Tag must start at bit position 0 for this code to work");
  btl(edx, edi);
  jcc(carry_clear, Constant(loop_condition));
  comment("Store obj tag");
  movl(Address(esp, edi, times_8, Constant(BytesPerWord)), Constant(obj_tag));
  bind(loop_condition);
  testl(edi, edi);
  jcc(not_zero, Constant(loop_entry));
  ret();

bind(extended_call_info);
  comment("Normal call info");
  // The following code is slightly complicated.  "Bit offset" below
  // pretends like the callinfo's are in a bit array, as follows:
  //     Callinfo describing bci and offset
  //     Size [16 bits] and stack info 0-3
  //     Stack info 4-11
  // We ignore the fact that each of these words is preceded by a byte
  // that makes it look like an instruction.
  pushl(ecx); 
  pushl(ebx);
  Label loopx_entry, loopx_done;
 
  comment("Bit offset of first argument in CallInfo array");
  movzxw(edx, Address(eax, Constant(5 + 1)));  // total number of locals/expr
  subl(edx, ecx);               // number of locals/expr belonging to callee
  shll(edx, Constant(2));       // number of bits per nybble
  addl(edx, Constant(32 + 16)); // 48 bits is the 32 bit callinfo and 16bit size info

  comment("Decrement argument count; move to more convenient register");
  leal(esi, Address(ecx, Constant(-1)));

  comment("Location of tag of esi-th local");
  leal(ebx, Address(esp, Constant(3 * BytesPerWord)));

bind(loopx_entry);
  comment("eax holds the return address");
  comment("ebx holds address of the esi-th tag");
  comment("esi is the local whose tag we are setting");
  comment("edx contains the bit offset of Local 0 in the CallInfo array");
  comment("Get bit offset of esi-th local");
  leal(ecx, Address(edx, esi, times_4));

  comment("From bit offset, get word offset, then multiply by 5");
  movl(edi, ecx);
  shrl(edi, Constant(5));
  leal(edi, Address(edi, edi, times_4));

  comment("Get the appropriate CallInfo word; extract the nybble");
  movl(edi, Address(eax, edi, times_1, Constant(1)));
  shrl(edi);
  andl(edi, Constant(0xF));

  comment("Tag is (1 << value) >> 1.  This is 0 when value == 0");
  movl(ecx, edi);
  movl(edi, Constant(1));
  shll(edi);
  shrl(edi, Constant(1));

  comment("Store the tag");
  movl(Address(ebx), edi);

  comment("Are we done?");
  decl(esi); 
  addl(ebx, Constant(8));
  testl(esi, esi);
  jcc(greater_equal, Constant(loopx_entry));
bind(loopx_done);
  popl(ebx); 
  popl(ecx);
  ret();

  entry_end(); // interpreter_fill_in_tags
}
Exemple #15
0
int
main(int argc, char *argv[])
{
	static const int ex_under = FE_UNDERFLOW | FE_INEXACT;	/* shorthand */
	static const int ex_over = FE_OVERFLOW | FE_INEXACT;
	long double ldbl_small, ldbl_eps, ldbl_max;

	printf("1..5\n");

#ifdef	__i386__
	fpsetprec(FP_PE);
#endif
	/*
	 * We can't use a compile-time constant here because gcc on
	 * FreeBSD/i386 assumes long doubles are truncated to the
	 * double format.
	 */
	ldbl_small = ldexpl(1.0, LDBL_MIN_EXP - LDBL_MANT_DIG);
	ldbl_eps = LDBL_EPSILON;
	ldbl_max = ldexpl(1.0 - ldbl_eps / 2, LDBL_MAX_EXP);

	/*
	 * Special cases involving zeroes.
	 */
#define	ztest(prec)							      \
	test##prec(copysign##prec(1.0, nextafter##prec(0.0, -0.0)), -1.0, 0); \
	test##prec(copysign##prec(1.0, nextafter##prec(-0.0, 0.0)), 1.0, 0);  \
	test##prec(copysign##prec(1.0, nexttoward##prec(0.0, -0.0)), -1.0, 0);\
	test##prec(copysign##prec(1.0, nexttoward##prec(-0.0, 0.0)), 1.0, 0)

	ztest();
	ztest(f);
	ztest(l);
#undef	ztest

#define	stest(next, eps, prec)					\
	test##prec(next(-0.0, 42.0), eps, ex_under);		\
	test##prec(next(0.0, -42.0), -eps, ex_under);		\
	test##prec(next(0.0, INFINITY), eps, ex_under);		\
	test##prec(next(-0.0, -INFINITY), -eps, ex_under)

	stest(nextafter, 0x1p-1074, );
	stest(nextafterf, 0x1p-149f, f);
	stest(nextafterl, ldbl_small, l);
	stest(nexttoward, 0x1p-1074, );
	stest(nexttowardf, 0x1p-149f, f);
	stest(nexttowardl, ldbl_small, l);
#undef	stest

	printf("ok 1 - next\n");

	/*
	 * `x == y' and NaN tests
	 */
	testall(42.0, 42.0, 42.0, 0);
	testall(-42.0, -42.0, -42.0, 0);
	testall(INFINITY, INFINITY, INFINITY, 0);
	testall(-INFINITY, -INFINITY, -INFINITY, 0);
	testall(NAN, 42.0, NAN, 0);
	testall(42.0, NAN, NAN, 0);
	testall(NAN, NAN, NAN, 0);

	printf("ok 2 - next\n");

	/*
	 * Tests where x is an ordinary normalized number
	 */
	testboth(1.0, 2.0, 1.0 + DBL_EPSILON, 0, );
	testboth(1.0, -INFINITY, 1.0 - DBL_EPSILON/2, 0, );
	testboth(1.0, 2.0, 1.0 + FLT_EPSILON, 0, f);
	testboth(1.0, -INFINITY, 1.0 - FLT_EPSILON/2, 0, f);
	testboth(1.0, 2.0, 1.0 + ldbl_eps, 0, l);
	testboth(1.0, -INFINITY, 1.0 - ldbl_eps/2, 0, l);

	testboth(-1.0, 2.0, -1.0 + DBL_EPSILON/2, 0, );
	testboth(-1.0, -INFINITY, -1.0 - DBL_EPSILON, 0, );
	testboth(-1.0, 2.0, -1.0 + FLT_EPSILON/2, 0, f);
	testboth(-1.0, -INFINITY, -1.0 - FLT_EPSILON, 0, f);
	testboth(-1.0, 2.0, -1.0 + ldbl_eps/2, 0, l);
	testboth(-1.0, -INFINITY, -1.0 - ldbl_eps, 0, l);

	/* Cases where nextafter(...) != nexttoward(...) */
	test(nexttoward(1.0, 1.0 + ldbl_eps), 1.0 + DBL_EPSILON, 0);
	testf(nexttowardf(1.0, 1.0 + ldbl_eps), 1.0 + FLT_EPSILON, 0);
	testl(nexttowardl(1.0, 1.0 + ldbl_eps), 1.0 + ldbl_eps, 0);

	printf("ok 3 - next\n");

	/*
	 * Tests at word boundaries, normalization boundaries, etc.
	 */
	testboth(0x1.87654ffffffffp+0, INFINITY, 0x1.87655p+0, 0, );
	testboth(0x1.87655p+0, -INFINITY, 0x1.87654ffffffffp+0, 0, );
	testboth(0x1.fffffffffffffp+0, INFINITY, 0x1p1, 0, );
	testboth(0x1p1, -INFINITY, 0x1.fffffffffffffp+0, 0, );
	testboth(0x0.fffffffffffffp-1022, INFINITY, 0x1p-1022, 0, );
	testboth(0x1p-1022, -INFINITY, 0x0.fffffffffffffp-1022, ex_under, );

	testboth(0x1.fffffep0f, INFINITY, 0x1p1, 0, f);
	testboth(0x1p1, -INFINITY, 0x1.fffffep0f, 0, f);
	testboth(0x0.fffffep-126f, INFINITY, 0x1p-126f, 0, f);
	testboth(0x1p-126f, -INFINITY, 0x0.fffffep-126f, ex_under, f);

#if LDBL_MANT_DIG == 53
	testboth(0x1.87654ffffffffp+0L, INFINITY, 0x1.87655p+0L, 0, l);
	testboth(0x1.87655p+0L, -INFINITY, 0x1.87654ffffffffp+0L, 0, l);
	testboth(0x1.fffffffffffffp+0L, INFINITY, 0x1p1L, 0, l);
	testboth(0x1p1L, -INFINITY, 0x1.fffffffffffffp+0L, 0, l);
	testboth(0x0.fffffffffffffp-1022L, INFINITY, 0x1p-1022L, 0, l);
	testboth(0x1p-1022L, -INFINITY, 0x0.fffffffffffffp-1022L, ex_under, l);
#elif LDBL_MANT_DIG == 64 && !defined(__i386)
	testboth(0x1.87654321fffffffep+0L, INFINITY, 0x1.87654322p+0L, 0, l);
	testboth(0x1.87654322p+0L, -INFINITY, 0x1.87654321fffffffep+0L, 0, l);
	testboth(0x1.fffffffffffffffep0L, INFINITY, 0x1p1L, 0, l);
	testboth(0x1p1L, -INFINITY, 0x1.fffffffffffffffep0L, 0, l);
	testboth(0x0.fffffffffffffffep-16382L, INFINITY, 0x1p-16382L, 0, l);
	testboth(0x1p-16382L, -INFINITY,
	    0x0.fffffffffffffffep-16382L, ex_under, l);
#elif LDBL_MANT_DIG == 113
	testboth(0x1.876543210987ffffffffffffffffp+0L, INFINITY,
	    0x1.876543210988p+0, 0, l);
	testboth(0x1.876543210988p+0L, -INFINITY,
	    0x1.876543210987ffffffffffffffffp+0L, 0, l);
	testboth(0x1.ffffffffffffffffffffffffffffp0L, INFINITY, 0x1p1L, 0, l);
	testboth(0x1p1L, -INFINITY, 0x1.ffffffffffffffffffffffffffffp0L, 0, l);
	testboth(0x0.ffffffffffffffffffffffffffffp-16382L, INFINITY,
	    0x1p-16382L, 0, l);
	testboth(0x1p-16382L, -INFINITY,
	    0x0.ffffffffffffffffffffffffffffp-16382L, ex_under, l);
#endif

	printf("ok 4 - next\n");

	/*
	 * Overflow tests
	 */
	test(idd(nextafter(DBL_MAX, INFINITY)), INFINITY, ex_over);
	test(idd(nextafter(INFINITY, 0.0)), DBL_MAX, 0);
	test(idd(nexttoward(DBL_MAX, DBL_MAX * 2.0L)), INFINITY, ex_over);
#if LDBL_MANT_DIG > 53
	test(idd(nexttoward(INFINITY, DBL_MAX * 2.0L)), DBL_MAX, 0);
#endif

	testf(idf(nextafterf(FLT_MAX, INFINITY)), INFINITY, ex_over);
	testf(idf(nextafterf(INFINITY, 0.0)), FLT_MAX, 0);
	testf(idf(nexttowardf(FLT_MAX, FLT_MAX * 2.0)), INFINITY, ex_over);
	testf(idf(nexttowardf(INFINITY, FLT_MAX * 2.0)), FLT_MAX, 0);

	testboth(ldbl_max, INFINITY, INFINITY, ex_over, l);
	testboth(INFINITY, 0.0, ldbl_max, 0, l);

	printf("ok 5 - next\n");

	return (0);
}
void CompilerStubs::generate_compiler_new_obj_array() {
  comment_section("Compiler stub: new object array");
  comment("- ebx holds the prototypical near of the array class");

  entry("compiler_new_obj_array");
  comment("Get array length");
  // add BytesPerWord for return address
  movl(edx, Address(esp, Constant(BytesPerWord + JavaFrame::arg_offset_from_sp(0))));

  Label slow_case;
  comment("Check if the array length is too large or negative");
  cmpl(edx, Constant(maximum_safe_array_length));
  jcc(above, Constant(slow_case));

  comment("Get _inline_allocation_top");
  movl(eax, Address(Constant("_inline_allocation_top")));

  if (GenerateDebugAssembly) {
    comment("Check ExcessiveGC");
    testl(Address(Constant("ExcessiveGC")), Constant(0));
    jcc(not_zero, Constant(slow_case));
  }

  comment("Compute new top");
  leal(ecx, Address(eax, edx, times_4, Constant(Array::base_offset())));

  comment("Check for overflow");
  cmpl(ecx, eax);
  jcc(below, Constant(slow_case));

  comment("Compare against _inline_allocation_end");
  cmpl(ecx, Address(Constant("_inline_allocation_end")));
  jcc(above, Constant(slow_case));

  comment("Allocation succeeded, set _inline_allocation_top");
  movl(Address(Constant("_inline_allocation_top")), ecx);

  comment("Set prototypical near in object; no need for write barrier");
  movl(Address(eax), ebx);

  comment("Set the length");
  movl(Address(eax, Constant(Array::length_offset())), edx);

  comment("Compute remaining size");
  testl(edx, edx);

  comment("Empty array?");
  Label init_done;
  jcc(equal, Constant(init_done));

  comment("Zero array elements");
  xorl(ecx, ecx);
  Label init_loop;
  bind(init_loop);
  movl(Address(eax, edx, times_4, Constant(Array::base_offset() - oopSize)), ecx);
  decrement(edx, 1);
  jcc(not_zero, Constant(init_loop));
  bind(init_done);

  comment("The newly allocated array is in register eax");
  ret();

  comment("Slow case - call the VM runtime system");
  bind(slow_case);
  leal(eax, Address(Constant("anewarray")));
  goto_shared_call_vm(T_ARRAY);

  entry_end(); // compiler_new_obj_array
}
  address generate_call_stub(address& return_address) {
    StubCodeMark mark(this, "StubRoutines", "call_stub");
    address start = __ pc();

    // stub code parameters / addresses
    assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
    bool  sse_save = false;
    const Address esp_after_call(ebp, -4 * wordSize); // same as in generate_catch_exception()!
    const Address mxcsr_save    (ebp, -4 * wordSize);
    const Address result        (ebp,  3 * wordSize);
    const Address result_type   (ebp,  4 * wordSize);
    const Address method        (ebp,  5 * wordSize);
    const Address entry_point   (ebp,  6 * wordSize);
    const Address parameters    (ebp,  7 * wordSize);
    const Address parameter_size(ebp,  8 * wordSize);
    const Address thread        (ebp,  9 * wordSize); // same as in generate_catch_exception()!
#ifdef COMPILER2
    sse_save =  VM_Version::supports_sse();
#endif

    // stub code
    __ enter();    

    // save edi, esi, & ebx, according to C calling conventions
    __ pushl(edi);
    __ pushl(esi);
    __ pushl(ebx);
    __ subl(esp, wordSize);  // space for %mxcsr save
    // save and initialize %mxcsr
    if (sse_save) {
      __ stmxcsr(mxcsr_save);
      __ ldmxcsr(Address((int) StubRoutines::addr_mxcsr_std(), relocInfo::none));
    }

#ifdef ASSERT
    // make sure we have no pending exceptions
    { Label L;
      __ movl(ecx, thread);
      __ cmpl(Address(ecx, Thread::pending_exception_offset()), (int)NULL);
      __ jcc(Assembler::equal, L);
      __ stop("StubRoutines::call_stub: entered with pending exception");
      __ bind(L);
    }
#endif

    // pass parameters if any
    Label parameters_done;
    __ movl(ecx, parameter_size);  // parameter counter
    __ testl(ecx, ecx);
    __ jcc(Assembler::zero, parameters_done);

    // parameter passing loop

    Label loop;
    __ movl(edx, parameters);	       // parameter pointer
    __ movl(esi, ecx);                 // parameter counter is in esi now
    __ movl(ecx,  Address(edx));       // get first parameter in case it is a receiver

    __ bind(loop);
    __ movl(eax, Address(edx));	       // get parameter
    __ addl(edx, wordSize);            // advance to next parameter
    __ decl(esi);                      // decrement counter
    __ pushl(eax);                     // pass parameter
    __ jcc(Assembler::notZero, loop);

    // call Java function
    __ bind(parameters_done);
    __ movl(ebx, method);              // get methodOop
    __ movl(esi, entry_point);         // get entry_point
    __ call(esi, relocInfo::none);
    return_address = __ pc();

    // store result depending on type
    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
    __ movl(edi, result);
    Label is_long, is_float, is_double, exit;
    __ movl(esi, result_type);
    __ cmpl(esi, T_LONG);
    __ jcc(Assembler::equal, is_long);
    __ cmpl(esi, T_FLOAT);
    __ jcc(Assembler::equal, is_float);
    __ cmpl(esi, T_DOUBLE);
    __ jcc(Assembler::equal, is_double);

    // handle T_INT case
    __ movl(Address(edi), eax);
    __ bind(exit);

    // pop parameters
    __ movl(ecx, parameter_size);
    __ leal(esp, Address(esp, ecx, Address::times_4));

    // check if parameters have been popped correctly
#ifdef ASSERT
      Label esp_wrong;
      __ leal(edi, esp_after_call);
      __ cmpl(esp, edi);
      __ jcc(Assembler::notEqual, esp_wrong);
#endif

    // restore %mxcsr
    if (sse_save) {
      __ ldmxcsr(mxcsr_save);
    }
    // restore edi & esi
    __ addl(esp, wordSize);  // remove %mxcsr save area
    __ popl(ebx);
    __ popl(esi);
    __ popl(edi);    

    // return
    __ popl(ebp);
    __ ret(0);

    // handle return types different from T_INT
    __ bind(is_long);
    __ movl(Address(edi, 0 * wordSize), eax);
    __ movl(Address(edi, 1 * wordSize), edx);
    __ jmp(exit);

    __ bind(is_float);
    __ fstp_s(Address(edi));
    __ jmp(exit);

    __ bind(is_double);
    __ fstp_d(Address(edi));
    __ jmp(exit);

#ifdef ASSERT
      // stack pointer misadjusted
      __ bind(esp_wrong);
      __ stop("esp wrong after Java call");
#endif

    return start;
  }
  address generate_verify_oop() {
    StubCodeMark mark(this, "StubRoutines", "verify_oop");
    address start = __ pc();
    
    // Incoming arguments on stack after saving eax:
    //
    // [tos    ]: saved edx
    // [tos + 1]: saved EFLAGS
    // [tos + 2]: return address
    // [tos + 3]: char* error message
    // [tos + 4]: oop   object to verify
    // [tos + 5]: saved eax - saved by caller and bashed
    
    Label exit, error;
    __ pushfd();
    __ incl(Address((int)StubRoutines::verify_oop_count_addr(), relocInfo::none));
    __ pushl(edx);                               // save edx
    // make sure object is 'reasonable'
    __ movl(eax, Address(esp, 4 * wordSize));    // get object
    __ testl(eax, eax);
    __ jcc(Assembler::zero, exit);               // if obj is NULL it is ok
    
    // Check if the oop is in the right area of memory
    const int oop_mask = Universe::verify_oop_mask();
    const int oop_bits = Universe::verify_oop_bits();
    __ movl(edx, eax);
    __ andl(edx, oop_mask);
    __ cmpl(edx, oop_bits);
    __ jcc(Assembler::notZero, error);

    // make sure klass is 'reasonable'
    __ movl(eax, Address(eax, oopDesc::klass_offset_in_bytes())); // get klass
    __ testl(eax, eax);
    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken

    // Check if the klass is in the right area of memory
    const int klass_mask = Universe::verify_klass_mask();
    const int klass_bits = Universe::verify_klass_bits();
    __ movl(edx, eax);
    __ andl(edx, klass_mask);
    __ cmpl(edx, klass_bits);
    __ jcc(Assembler::notZero, error);

    // make sure klass' klass is 'reasonable'
    __ movl(eax, Address(eax, oopDesc::klass_offset_in_bytes())); // get klass' klass
    __ testl(eax, eax);
    __ jcc(Assembler::zero, error);              // if klass' klass is NULL it is broken

    __ movl(edx, eax);
    __ andl(edx, klass_mask);
    __ cmpl(edx, klass_bits);
    __ jcc(Assembler::notZero, error);           // if klass not in right area
                                                 // of memory it is broken too.

    // return if everything seems ok
    __ bind(exit);
    __ movl(eax, Address(esp, 5 * wordSize));    // get saved eax back
    __ popl(edx);                                // restore edx
    __ popfd();                                  // restore EFLAGS
    __ ret(3 * wordSize);                        // pop arguments

    // handle errors
    __ bind(error);
    __ movl(eax, Address(esp, 5 * wordSize));    // get saved eax back
    __ popl(edx);                                // get saved edx back
    __ popfd();                                  // get saved EFLAGS off stack -- will be ignored
    __ pushad();                                 // push registers (eip = return address & msg are already pushed)
    __ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
    __ popad();
    __ ret(3 * wordSize);                        // pop arguments
    return start;
  }
void NativeGenerator::generate_native_system_entries() {
  comment_section("Native entry points for system functions");

  rom_linkable_entry("native_jvm_unchecked_byte_arraycopy_entry");
  jmp(Constant("native_system_arraycopy_entry"));
  rom_linkable_entry_end();

  rom_linkable_entry("native_jvm_unchecked_char_arraycopy_entry");
  jmp(Constant("native_system_arraycopy_entry"));
  rom_linkable_entry_end();

  rom_linkable_entry("native_jvm_unchecked_int_arraycopy_entry");
  jmp(Constant("native_system_arraycopy_entry"));
  rom_linkable_entry_end();

  rom_linkable_entry("native_jvm_unchecked_long_arraycopy_entry");
  jmp(Constant("native_system_arraycopy_entry"));
  rom_linkable_entry_end();

  rom_linkable_entry("native_jvm_unchecked_obj_arraycopy_entry");
  jmp(Constant("native_system_arraycopy_entry"));
  rom_linkable_entry_end();

  rom_linkable_entry("native_system_arraycopy_entry");

  wtk_profile_quick_call(/* param_size*/ 5);

  Label bailout, cont, try_2_byte, try_4_byte, try_8_byte, do_4_byte;

  //  public static native void arraycopy(Object src, int src_position,
  //                                      Object dst, int dst_position,
  //                                      int length);
  comment("preserve method");
  pushl(ebx);

  // 8 is for the preserved method and the return address
  int  length_offset  =  JavaFrame::arg_offset_from_sp(0) + 8,
       dst_pos_offset =  JavaFrame::arg_offset_from_sp(1) + 8,
       dst_offset     =  JavaFrame::arg_offset_from_sp(2) + 8,
       src_pos_offset =  JavaFrame::arg_offset_from_sp(3) + 8,
       src_offset     =  JavaFrame::arg_offset_from_sp(4) + 8;

  comment("load arguments to registers");
  movl(ecx, Address(esp, Constant(length_offset)));
  movl(edi, Address(esp, Constant(dst_pos_offset)));
  movl(edx, Address(esp, Constant(dst_offset)));
  movl(esi, Address(esp, Constant(src_pos_offset)));
  movl(eax, Address(esp, Constant(src_offset)));

  // eax = src
  // ebx = tmp register
  // edx = dst
  // ecx = length
  // esi = src_pos
  // edi = dst_pos

  comment("if (src == NULL) goto bailout;");
  testl( eax, eax );
  jcc(zero, Constant(bailout));

  comment("if (dst == NULL) goto bailout;");
  testl( edx, edx );
  jcc(zero, Constant(bailout));

  comment("if (length < 0 || src_pos < 0 || dst_pos < 0) goto bailout;");
  movl(ebx, ecx);
  orl(ebx, esi);
  orl(ebx, edi);
  jcc(negative, Constant(bailout));

  comment("if ((unsigned int) dst.length < (unsigned int) dst_pos + (unsigned int) length) goto bailout;");
  movl(ebx, ecx);
  addl(ebx, edi);
  cmpl(Address(edx, Constant(Array::length_offset())), ebx);
  jcc(below, Constant(bailout));

  comment("if ((unsigned int) src.length < (unsigned int) src_pos + (unsigned int) length) goto bailout;");
  movl(ebx, ecx);
  addl(ebx, esi);
  cmpl(Address(eax, Constant(Array::length_offset())), ebx);
  jcc(below, Constant(bailout));

  comment("Same near test");
  comment("if (src.near != dst.near) goto bailout;");
  movl(ebx, Address(eax, Constant(Oop::klass_offset())));
  cmpl(ebx, Address(edx, Constant(Oop::klass_offset())));
  jcc(not_equal, Constant(bailout));

  comment("load the instance_size");
  movl(ebx, Address(ebx, Constant(JavaNear::klass_offset())));
  movsxw(ebx, Address(ebx, Constant(FarClass::instance_size_offset())));

  comment("if (instance_size != size_type_array_1()) goto try_2_byte");
  cmpl(ebx, Constant(InstanceSize::size_type_array_1));
  jcc(not_equal, Constant(try_2_byte));
  leal(esi, Address(eax, esi, times_1, Constant(Array::base_offset())));
  leal(edi, Address(edx, edi, times_1, Constant(Array::base_offset())));
  jmp(Constant(cont));

  bind(try_2_byte);
  comment("if (instance_size != size_type_array_2()) goto try_4_byte");
  cmpl(ebx, Constant(InstanceSize::size_type_array_2));
  jcc(not_equal, Constant(try_4_byte));
  leal(esi, Address(eax, esi, times_2, Constant(Array::base_offset())));
  leal(edi, Address(edx, edi, times_2, Constant(Array::base_offset())));
  shll(ecx, Constant(1));
  jmp(Constant(cont));

  bind(try_4_byte);
  comment("if (instance_size == size_type_array_4()) goto do_4_byte");
  cmpl(ebx, Constant(InstanceSize::size_type_array_4));
  jcc(equal, Constant(do_4_byte) );

  comment("if (instance_size != size_obj_array()) goto bailout");
  cmpl(ebx, Constant(InstanceSize::size_obj_array));
  jcc(not_equal, Constant(bailout));

  comment("if (dst < old_generation_end) goto bailout");
  cmpl( edx, Address( Constant( "_old_generation_end" ) ) );
  jcc( below, Constant(bailout));

  bind(do_4_byte);
  leal(esi, Address(eax, esi, times_4, Constant(Array::base_offset())));
  leal(edi, Address(edx, edi, times_4, Constant(Array::base_offset())));
  shll(ecx, Constant(2));

  bind(cont);
  comment("memmove(edi, esi, ecx);");
  pushl(ecx);
  pushl(esi);
  pushl(edi);
  call(Constant("memmove"));
  addl(esp, Constant(16));

  ret(Constant(5 * BytesPerStackElement));

  comment("Bail out to the general arraycopy implementation");
  bind(bailout);
  comment("pop method");
  popl(ebx);

  if (AddExternCUnderscore) {
    emit_instruction("jmp _interpreter_method_entry");
  } else {
    emit_instruction("jmp  interpreter_method_entry");
  }

  rom_linkable_entry_end(); // native_system_arraycopy_entry
}
void NativeGenerator::generate_native_string_entries() {

  comment_section("Native entry points for string functions");
  {

    //--------------------java.lang.String.indexof0---------------------------
    rom_linkable_entry("native_string_indexof0_entry");

    wtk_profile_quick_call(/* param_size*/ 2);

    comment("Pop the return address");
    popl(edi);
    comment("Push zero for fromIndex");
    pushl(Constant(0));
    comment("Push back the return address");
    pushl(edi);

    jmp(Constant("native_string_indexof_entry"));
    rom_linkable_entry_end(); // native_string_indexof0_entry

    //--------------------java.lang.String.indexof---------------------------

    rom_linkable_entry("native_string_indexof_entry");
    Label cont, loop, test, failure, success;

    wtk_profile_quick_call(/* param_size*/ 3);

    comment("Pop the return address");
    popl(edi);

    comment("Pop the argument: fromIndex");
    pop_int(eax, eax);

    comment("Pop the argument: ch");
    pop_int(ebx, ebx);

    comment("Pop the receiver");
    pop_obj(ecx, ecx);

    cmpl(ebx, Constant(0xFFFF));
    jcc(greater, Constant(failure));

    cmpl(eax, Constant(0));
    jcc(greater_equal, Constant(cont));
    movl(eax, Constant(0));

    bind(cont);
    movl(esi, Address(ecx, Constant(String::count_offset())));

    comment("if (fromIndex >= count) { return -1; }");
    cmpl(eax, esi);
    jcc(greater_equal, Constant(failure));

    movl(edx, Address(ecx, Constant(String::offset_offset())));
    addl(eax, edx); // i = offset + fromIndex
    addl(edx, esi); // int max = offset + count;
    movl(esi, Address(ecx, Constant(String::value_offset())));    // v = value.
    jmp(Constant(test));

    bind(loop);
    cmpw(Address(esi, eax, times_2, Constant(Array::base_offset())),  ebx);
    jcc(equal, Constant(success));
    incl(eax);

    bind(test);
    cmpl(eax, edx);
    jcc(less, Constant(loop));

    comment("Return -1 by pushing the value and jumping to the return address");
    bind(failure);
    push_int(-1);
    jmp(edi);

    comment("Return i - offset by pushing the value and jumping to the return address");
    bind(success);
    movl(esi, Address(ecx, Constant(String::offset_offset())));   // i = offset + fromIndex
    subl(eax, esi);
    push_int(eax);
    jmp(edi);

    rom_linkable_entry_end(); // native_string_indexof_entry
  }

  //----------------------java.lang.String.charAt---------------------------

  {
    rom_linkable_entry("native_string_charAt_entry");
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }
    rom_linkable_entry_end();
  }

  //----------------------java.lang.String(java.lang.StringBuffer)-------------

  {
    rom_linkable_entry("native_string_init_entry");
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }
    rom_linkable_entry_end();
  }

  //----------------------java.lang.String.equals(java.lang.Object)------------

  {
    rom_linkable_entry("native_string_equals_entry");
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }
    rom_linkable_entry_end();
  }

  //----------------------java.lang.String.indexOf(java.lang.String)-----------

  {
    rom_linkable_entry("native_string_indexof0_string_entry");
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }
    rom_linkable_entry_end();
  }

  //----------------------java.lang.String.indexOf(java.lang.String)-----------

  {
    rom_linkable_entry("native_string_indexof_string_entry");
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }
    rom_linkable_entry_end();
  }

  //----------------------java.lang.String.compareTo---------------------------

  { // java.lang.String.compareTo
    // Method int compareTo(java.lang.String)

    rom_linkable_entry("native_string_compareTo_entry");

    wtk_profile_quick_call(/* param_size*/ 2);

    comment("preserve method");
    pushl(ebx);

    // 8 is return address plus pushed method
    int  str1_offset =  JavaFrame::arg_offset_from_sp(0) + 8,
         str0_offset =  JavaFrame::arg_offset_from_sp(1) + 8;

    comment("load arguments to registers");
    movl(ecx, Address(esp, Constant(str1_offset)));
    movl(eax, Address(esp, Constant(str0_offset)));

    // eax: str0: this String
    // ebx: str1: String to compare against

    Label bailout;

    comment("Null check");
    testl(ecx, ecx);
    jcc(zero, Constant(bailout));

    comment("get str0.value[]");
    movl(esi, Address(eax, Constant(String::value_offset())));
    comment("get str0.offset");
    movl(ebx, Address(eax, Constant(String::offset_offset())));
    comment("compute start of character data");
    leal(esi, Address(esi, ebx, times_2, Constant(Array::base_offset())));
    comment("get str0.count");
    movl(eax, Address(eax, Constant(String::count_offset())));

    comment("get str1.value[]");
    movl(edi, Address(ecx, Constant(String::value_offset())));
    comment("get str1.offset");
    movl(ebx, Address(ecx, Constant(String::offset_offset())));
    comment("compute start of character data");
    leal(edi, Address(edi, ebx, times_2, Constant(Array::base_offset())));
    comment("get str1.count");
    movl(ebx, Address(ecx, Constant(String::count_offset())));

    // esi = str0 start of character data
    // edi = str1 start of character data
    // eax = str0 length
    // ebx = str1 length

    Label str1_longest;
    subl(eax, ebx);
    jcc(greater_equal, Constant(str1_longest));
    // str1 is longer than str0
    addl(ebx, eax);
    bind(str1_longest);

    // esi = str0 start of character data
    // edi = str1 start of character data
    // eax = str0.count - str1.count
    // ebx = min(str0.count, str1.count)

    // save str0.count - str1.count, we might need it later
    pushl(eax);

    xorl(ecx, ecx);

    Label loop, check_lengths, done;
    bind(loop);
    cmpl(ecx, ebx);
    jcc(above_equal, Constant(check_lengths));
    movzxw(eax, Address(esi, ecx, times_2));
    movzxw(edx, Address(edi, ecx, times_2));
    subl(eax, edx);
    jcc(not_equal, Constant(done));
    incl(ecx);
    jmp(Constant(loop));

    bind(check_lengths);
    movl(eax, Address(esp));

    bind(done);
    popl(ebx); // remove saved length difference

    // Push result on stack and return to caller
    popl(ebx);     // remove method
    popl(edi);     // pop return address
    addl(esp, Constant(2 * BytesPerStackElement)); // remove arguments
    push_int(eax); // push result
    jmp(edi);      // return

    comment("Bail out to the general compareTo implementation");
    bind(bailout);
    comment("pop method");
    popl(ebx);
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }

    rom_linkable_entry_end(); // native_string_compareTo_entry
  }

  //----------------------java.lang.String.endsWith----------------

  {
    // java.lang.String.endsWith
    // Method boolean endsWith(java.lang.String)

    rom_linkable_entry("native_string_endsWith_entry");

    wtk_profile_quick_call(/* param_size*/ 2);

    Label bailout;

    // 4 is return address
    int suffix_offset =  JavaFrame::arg_offset_from_sp(0) + 4,
        this_offset =  JavaFrame::arg_offset_from_sp(1) + 4;

    comment("load arguments to registers");
    movl(eax, Address(esp, Constant(suffix_offset)));
    cmpl(eax, Constant(0));
    jcc(equal, Constant(bailout));

    movl(ecx, Address(esp, Constant(this_offset)));

    comment("Pop the return address");
    popl(edi);

    movl(edx, Address(ecx, Constant(String::count_offset())));
    subl(edx, Address(eax, Constant(String::count_offset())));

    comment("Push (this.count - suffix.count) for toffset");
    pushl(edx);

    comment("Push back the return address");
    pushl(edi);

    jmp(Constant("native_string_startsWith_entry"));

    comment("Bail out to the general startsWith implementation");
    bind(bailout);
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }

    rom_linkable_entry_end(); // native_string_endsWith_entry
  }

  //----------------------java.lang.String.startsWith----------------
  {
    // java.lang.String.startsWith
    // Method boolean startsWith(java.lang.String)
    rom_linkable_entry("native_string_startsWith0_entry");

    wtk_profile_quick_call(/* param_size*/ 2);
    Label bailout;

    // 4 is return address
    int prefix_offset = JavaFrame::arg_offset_from_sp(0) + 4;

    comment("Check if prefix is null");
    cmpl(Address(esp, Constant(prefix_offset)), Constant(0));
    jcc(equal, Constant(bailout));

    comment("Pop the return address");
    popl(edi);
    comment("Push zero for toffset");
    pushl(Constant(0));
    comment("Push back the return address");
    pushl(edi);

    jmp(Constant("native_string_startsWith_entry"));

    comment("Bail out to the general startsWith implementation");
    bind(bailout);
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }

    rom_linkable_entry_end(); // native_string_startsWith0_entry
  }

  {
    // ----------- java.lang.String.startsWith ------------------------------
    // Method boolean startsWith(java.lang.String,int)

    rom_linkable_entry("native_string_startsWith_entry");

    wtk_profile_quick_call(/* param_size*/ 3);

    Label bailout, return_false;

    // 4 is return address
    int  prefix_offset =  JavaFrame::arg_offset_from_sp(1) + 4;

    comment("Check if prefix is null");
    cmpl(Address(esp, Constant(prefix_offset)), Constant(0));
    jcc(equal, Constant(bailout));

    comment("Pop the return address");
    popl(edi);

    comment("Pop the argument: toffset");
    pop_int(edx, edx);

    comment("Pop the argument: prefix");
    pop_obj(eax, eax);

    comment("Pop the receiver");
    pop_obj(ecx, ecx);

    comment("Preserve the return address");
    pushl(edi);

    // ecx: this String
    // eax: prefix

    cmpl(edx, Constant(0));
    jcc(less, Constant(return_false));

    comment("if (toffset > this.count - prefix.count) return false;");
    movl(ebx, Address(ecx, Constant(String::count_offset())));
    subl(ebx, Address(eax, Constant(String::count_offset())));
    cmpl(edx, ebx);
    jcc(greater, Constant(return_false));

    comment("get this.value[]");
    movl(esi, Address(ecx, Constant(String::value_offset())));
    comment("get this.offset");
    movl(ebx, Address(ecx, Constant(String::offset_offset())));
    comment("add toffset");
    addl(ebx, edx);
    comment("compute start of character data");
    leal(esi, Address(esi, ebx, times_2, Constant(Array::base_offset())));

    comment("get prefix.value[]");
    movl(edi, Address(eax, Constant(String::value_offset())));
    comment("get prefix.offset");
    movl(ebx, Address(eax, Constant(String::offset_offset())));
    comment("compute start of character data");
    leal(edi, Address(edi, ebx, times_2, Constant(Array::base_offset())));

    comment("get prefix.count");
    movl(ecx, Address(eax, Constant(String::count_offset())));
    comment("get the number of bytes to compare");
    shll(ecx, Constant(1));

    comment("memcmp(edi, esi, ecx);");
    pushl(ecx);
    pushl(esi);
    pushl(edi);

    if (GenerateInlineAsm) {
      // VC++ treats memcmp() as an intrinsic function and would cause
      // reference to memcmp in Interpreter_i386.c to fail to compile.
      call(Constant("memcmp_from_interpreter"));
    } else {
      call(Constant("memcmp"));
    }
    addl(esp, Constant(12));
    cmpl(eax, Constant(0));
    jcc(not_equal, Constant(return_false));

    // Push 1 on stack and return to caller
    popl(edi);     // pop return address
    push_int(1);   // push result
    jmp(edi);      // return

    bind(return_false);
    // Push 0 on stack and return to caller
    popl(edi);     // pop return address
    push_int(0);   // push result
    jmp(edi);      // return

    comment("Bail out to the general startsWith implementation");
    bind(bailout);
    if (AddExternCUnderscore) {
      emit_instruction("jmp _interpreter_method_entry");
    } else {
      emit_instruction("jmp  interpreter_method_entry");
    }

    rom_linkable_entry_end(); // native_string_startsWith_entry
  }
}
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
  address entry = __ pc();

  // rbx: method
  // r14: pointer to locals
  // c_rarg3: first stack arg - wordSize
  __ mov(c_rarg3, rsp);
  // adjust rsp
  __ subptr(rsp, 14 * wordSize);
  __ call_VM(noreg,
             CAST_FROM_FN_PTR(address,
                              InterpreterRuntime::slow_signature_handler),
             rbx, r14, c_rarg3);

  // rax: result handler

  // Stack layout:
  // rsp: 5 integer args (if static first is unused)
  //      1 float/double identifiers
  //      8 double args
  //        return address
  //        stack args
  //        garbage
  //        expression stack bottom
  //        bcp (NULL)
  //        ...

  // Do FP first so we can use c_rarg3 as temp
  __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers

  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
    const XMMRegister r = as_XMMRegister(i);

    Label d, done;

    __ testl(c_rarg3, 1 << i);
    __ jcc(Assembler::notZero, d);
    __ movflt(r, Address(rsp, (6 + i) * wordSize));
    __ jmp(done);
    __ bind(d);
    __ movdbl(r, Address(rsp, (6 + i) * wordSize));
    __ bind(done);
  }

  // Now handle integrals.  Only do c_rarg1 if not static.
  __ movl(c_rarg3, Address(rbx, Method::access_flags_offset()));
  __ testl(c_rarg3, JVM_ACC_STATIC);
  __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));

  __ movptr(c_rarg2, Address(rsp, wordSize));
  __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
  __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
  __ movptr(c_rarg5, Address(rsp, 4 * wordSize));

  // restore rsp
  __ addptr(rsp, 14 * wordSize);

  __ ret(0);

  return entry;
}
Exemple #22
0
// Helper to remove argument slots from the stack.
// arg_slots must be a multiple of stack_move_unit() and >= 0
void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
                                    RegisterOrConstant arg_slots,
                                    Register rax_argslot,
                                     Register rbx_temp, Register rdx_temp, Register temp3_reg) {
  assert(temp3_reg == noreg, "temp3 not required");
  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));

#ifdef ASSERT
  // Verify that [argslot..argslot+size) lies within (rsp, rbp).
  __ lea(rbx_temp, Address(rax_argslot, arg_slots, Address::times_ptr));
  verify_argslot(_masm, rbx_temp, "deleted argument(s) must fall within current frame");
  if (arg_slots.is_register()) {
    Label L_ok, L_bad;
    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
    __ jccb(Assembler::less, L_bad);
    __ testl(arg_slots.as_register(), -stack_move_unit() - 1);
    __ jccb(Assembler::zero, L_ok);
    __ bind(L_bad);
    __ stop("assert arg_slots >= 0 and clear low bits");
    __ bind(L_ok);
  } else {
    assert(arg_slots.as_constant() >= 0, "");
    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
  }
#endif //ASSERT

#ifdef _LP64
  if (false) {                  // not needed, since register is positive
    // clean high bits of stack motion register (was loaded as an int)
    if (arg_slots.is_register())
      __ movslq(arg_slots.as_register(), arg_slots.as_register());
  }
#endif

  BLOCK_COMMENT("remove_arg_slots {");
  // Pull up everything shallower than rax_argslot.
  // Then remove the excess space on the stack.
  // The stacked return address gets pulled up with everything else.
  // That is, copy [rsp, argslot) upward by size words.  In pseudo-code:
  //   for (rdx = argslot-1; rdx >= rsp; --rdx)
  //     rdx[size] = rdx[0]
  //   argslot += size;
  //   rsp += size;
  __ lea(rdx_temp, Address(rax_argslot, -wordSize)); // source pointer for copy
  {
    Label loop;
    __ BIND(loop);
    // pull one word up each time through the loop
    __ movptr(rbx_temp, Address(rdx_temp, 0));
    __ movptr(Address(rdx_temp, arg_slots, Address::times_ptr), rbx_temp);
    __ addptr(rdx_temp, -wordSize);
    __ cmpptr(rdx_temp, rsp);
    __ jccb(Assembler::greaterEqual, loop);
  }

  // Now move the argslot up, to point to the just-copied block.
  __ lea(rsp, Address(rsp, arg_slots, Address::times_ptr));
  // And adjust the argslot address to point at the deletion point.
  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Address::times_ptr));
  BLOCK_COMMENT("} remove_arg_slots");
}
Exemple #23
0
//------------------------------------------------------------------------------
// MethodHandles::generate_method_handle_stub
//
// Generate an "entry" field for a method handle.
// This determines how the method handle will respond to calls.
void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
  // Here is the register state during an interpreted call,
  // as set up by generate_method_handle_interpreter_entry():
  // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused)
  // - rcx: receiver method handle
  // - rax: method handle type (only used by the check_mtype entry point)
  // - rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
  // - rdx: garbage temp, can blow away

  const Register rcx_recv    = rcx;
  const Register rax_argslot = rax;
  const Register rbx_temp    = rbx;
  const Register rdx_temp    = rdx;

  // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls)
  // and gen_c2i_adapter (from compiled calls):
  const Register saved_last_sp = LP64_ONLY(r13) NOT_LP64(rsi);

  // Argument registers for _raise_exception.
  // 32-bit: Pass first two oop/int args in registers ECX and EDX.
  const Register rarg0_code     = LP64_ONLY(j_rarg0) NOT_LP64(rcx);
  const Register rarg1_actual   = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
  const Register rarg2_required = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
  assert_different_registers(rarg0_code, rarg1_actual, rarg2_required, saved_last_sp);

  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");

  // some handy addresses
  Address rbx_method_fie(     rbx,      methodOopDesc::from_interpreted_offset() );
  Address rbx_method_fce(     rbx,      methodOopDesc::from_compiled_offset() );

  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
  Address rcx_dmh_vmindex(    rcx_recv, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes() );

  Address rcx_bmh_vmargslot(  rcx_recv, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes() );
  Address rcx_bmh_argument(   rcx_recv, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes() );

  Address rcx_amh_vmargslot(  rcx_recv, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes() );
  Address rcx_amh_argument(   rcx_recv, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes() );
  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
  Address vmarg;                // __ argument_address(vmargslot)

  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();

  if (have_entry(ek)) {
    __ nop();                   // empty stubs make SG sick
    return;
  }

  address interp_entry = __ pc();

  trace_method_handle(_masm, entry_name(ek));

  BLOCK_COMMENT(entry_name(ek));

  switch ((int) ek) {
  case _raise_exception:
    {
      // Not a real MH entry, but rather shared code for raising an
      // exception.  Since we use the compiled entry, arguments are
      // expected in compiler argument registers.
      assert(raise_exception_method(), "must be set");
      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");

      const Register rdi_pc = rax;
      __ pop(rdi_pc);  // caller PC
      __ mov(rsp, saved_last_sp);  // cut the stack back to where the caller started

      Register rbx_method = rbx_temp;
      Label L_no_method;
      // FIXME: fill in _raise_exception_method with a suitable java.lang.invoke method
      __ movptr(rbx_method, ExternalAddress((address) &_raise_exception_method));
      __ testptr(rbx_method, rbx_method);
      __ jccb(Assembler::zero, L_no_method);

      const int jobject_oop_offset = 0;
      __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset));  // dereference the jobject
      __ testptr(rbx_method, rbx_method);
      __ jccb(Assembler::zero, L_no_method);
      __ verify_oop(rbx_method);

      NOT_LP64(__ push(rarg2_required));
      __ push(rdi_pc);         // restore caller PC
      __ jmp(rbx_method_fce);  // jump to compiled entry

      // Do something that is at least causes a valid throw from the interpreter.
      __ bind(L_no_method);
      __ push(rarg2_required);
      __ push(rarg1_actual);
      __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry()));
    }
    break;

  case _invokestatic_mh:
  case _invokespecial_mh:
    {
      Register rbx_method = rbx_temp;
      __ load_heap_oop(rbx_method, rcx_mh_vmtarget); // target is a methodOop
      __ verify_oop(rbx_method);
      // same as TemplateTable::invokestatic or invokespecial,
      // minus the CP setup and profiling:
      if (ek == _invokespecial_mh) {
        // Must load & check the first argument before entering the target method.
        __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
        __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
        __ null_check(rcx_recv);
        __ verify_oop(rcx_recv);
      }
      __ jmp(rbx_method_fie);
    }
    break;

  case _invokevirtual_mh:
    {
      // same as TemplateTable::invokevirtual,
      // minus the CP setup and profiling:

      // pick out the vtable index and receiver offset from the MH,
      // and then we can discard it:
      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
      Register rbx_index = rbx_temp;
      __ movl(rbx_index, rcx_dmh_vmindex);
      // Note:  The verifier allows us to ignore rcx_mh_vmtarget.
      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());

      // get receiver klass
      Register rax_klass = rax_argslot;
      __ load_klass(rax_klass, rcx_recv);
      __ verify_oop(rax_klass);

      // get target methodOop & entry point
      const int base = instanceKlass::vtable_start_offset() * wordSize;
      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
      Address vtable_entry_addr(rax_klass,
                                rbx_index, Address::times_ptr,
                                base + vtableEntry::method_offset_in_bytes());
      Register rbx_method = rbx_temp;
      __ movptr(rbx_method, vtable_entry_addr);

      __ verify_oop(rbx_method);
      __ jmp(rbx_method_fie);
    }
    break;

  case _invokeinterface_mh:
    {
      // same as TemplateTable::invokeinterface,
      // minus the CP setup and profiling:

      // pick out the interface and itable index from the MH.
      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
      Register rdx_intf  = rdx_temp;
      Register rbx_index = rbx_temp;
      __ load_heap_oop(rdx_intf, rcx_mh_vmtarget);
      __ movl(rbx_index, rcx_dmh_vmindex);
      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());

      // get receiver klass
      Register rax_klass = rax_argslot;
      __ load_klass(rax_klass, rcx_recv);
      __ verify_oop(rax_klass);

      Register rdi_temp   = rdi;
      Register rbx_method = rbx_index;

      // get interface klass
      Label no_such_interface;
      __ verify_oop(rdx_intf);
      __ lookup_interface_method(rax_klass, rdx_intf,
                                 // note: next two args must be the same:
                                 rbx_index, rbx_method,
                                 rdi_temp,
                                 no_such_interface);

      __ verify_oop(rbx_method);
      __ jmp(rbx_method_fie);
      __ hlt();

      __ bind(no_such_interface);
      // Throw an exception.
      // For historical reasons, it will be IncompatibleClassChangeError.
      __ mov(rbx_temp, rcx_recv);  // rarg2_required might be RCX
      assert_different_registers(rarg2_required, rbx_temp);
      __ movptr(rarg2_required, Address(rdx_intf, java_mirror_offset));  // required interface
      __ mov(   rarg1_actual,   rbx_temp);                               // bad receiver
      __ movl(  rarg0_code,     (int) Bytecodes::_invokeinterface);      // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
    }
    break;

  case _bound_ref_mh:
  case _bound_int_mh:
  case _bound_long_mh:
  case _bound_ref_direct_mh:
  case _bound_int_direct_mh:
  case _bound_long_direct_mh:
    {
      bool direct_to_method = (ek >= _bound_ref_direct_mh);
      BasicType arg_type  = T_ILLEGAL;
      int       arg_mask  = _INSERT_NO_MASK;
      int       arg_slots = -1;
      get_ek_bound_mh_info(ek, arg_type, arg_mask, arg_slots);

      // make room for the new argument:
      __ movl(rax_argslot, rcx_bmh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      insert_arg_slots(_masm, arg_slots * stack_move_unit(), arg_mask, rax_argslot, rbx_temp, rdx_temp);

      // store bound argument into the new stack slot:
      __ load_heap_oop(rbx_temp, rcx_bmh_argument);
      if (arg_type == T_OBJECT) {
        __ movptr(Address(rax_argslot, 0), rbx_temp);
      } else {
        Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type));
        const int arg_size = type2aelembytes(arg_type);
        __ load_sized_value(rdx_temp, prim_value_addr, arg_size, is_signed_subword_type(arg_type), rbx_temp);
        __ store_sized_value(Address(rax_argslot, 0), rdx_temp, arg_size, rbx_temp);
      }

      if (direct_to_method) {
        Register rbx_method = rbx_temp;
        __ load_heap_oop(rbx_method, rcx_mh_vmtarget);
        __ verify_oop(rbx_method);
        __ jmp(rbx_method_fie);
      } else {
        __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
        __ verify_oop(rcx_recv);
        __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
      }
    }
    break;

  case _adapter_retype_only:
  case _adapter_retype_raw:
    // immediately jump to the next MH layer:
    __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
    __ verify_oop(rcx_recv);
    __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    // This is OK when all parameter types widen.
    // It is also OK when a return type narrows.
    break;

  case _adapter_check_cast:
    {
      // temps:
      Register rbx_klass = rbx_temp; // interesting AMH data

      // check a reference argument before jumping to the next layer of MH:
      __ movl(rax_argslot, rcx_amh_vmargslot);
      vmarg = __ argument_address(rax_argslot);

      // What class are we casting to?
      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));

      Label done;
      __ movptr(rdx_temp, vmarg);
      __ testptr(rdx_temp, rdx_temp);
      __ jcc(Assembler::zero, done);         // no cast if null
      __ load_klass(rdx_temp, rdx_temp);

      // live at this point:
      // - rbx_klass:  klass required by the target method
      // - rdx_temp:   argument klass to test
      // - rcx_recv:   adapter method handle
      __ check_klass_subtype(rdx_temp, rbx_klass, rax_argslot, done);

      // If we get here, the type check failed!
      // Call the wrong_method_type stub, passing the failing argument type in rax.
      Register rax_mtype = rax_argslot;
      __ movl(rax_argslot, rcx_amh_vmargslot);  // reload argslot field
      __ movptr(rdx_temp, vmarg);

      assert_different_registers(rarg2_required, rdx_temp);
      __ load_heap_oop(rarg2_required, rcx_amh_argument);             // required class
      __ mov(          rarg1_actual,   rdx_temp);                     // bad object
      __ movl(         rarg0_code,     (int) Bytecodes::_checkcast);  // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

      __ bind(done);
      // get the new MH:
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_prim_to_prim:
  case _adapter_ref_to_prim:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
    {
      // perform an in-place conversion to int or an int subword
      __ movl(rax_argslot, rcx_amh_vmargslot);
      vmarg = __ argument_address(rax_argslot);

      switch (ek) {
      case _adapter_opt_i2i:
        __ movl(rdx_temp, vmarg);
        break;
      case _adapter_opt_l2i:
        {
          // just delete the extra slot; on a little-endian machine we keep the first
          __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
          remove_arg_slots(_masm, -stack_move_unit(),
                           rax_argslot, rbx_temp, rdx_temp);
          vmarg = Address(rax_argslot, -Interpreter::stackElementSize);
          __ movl(rdx_temp, vmarg);
        }
        break;
      case _adapter_opt_unboxi:
        {
          // Load the value up from the heap.
          __ movptr(rdx_temp, vmarg);
          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
#ifdef ASSERT
          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
            if (is_subword_type(BasicType(bt)))
              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
          }
#endif
          __ null_check(rdx_temp, value_offset);
          __ movl(rdx_temp, Address(rdx_temp, value_offset));
          // We load this as a word.  Because we are little-endian,
          // the low bits will be correct, but the high bits may need cleaning.
          // The vminfo will guide us to clean those bits.
        }
        break;
      default:
        ShouldNotReachHere();
      }

      // Do the requested conversion and store the value.
      Register rbx_vminfo = rbx_temp;
      __ movl(rbx_vminfo, rcx_amh_conversion);
      assert(CONV_VMINFO_SHIFT == 0, "preshifted");

      // get the new MH:
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      // (now we are done with the old MH)

      // original 32-bit vmdata word must be of this form:
      //    | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
      __ xchgptr(rcx, rbx_vminfo);                // free rcx for shifts
      __ shll(rdx_temp /*, rcx*/);
      Label zero_extend, done;
      __ testl(rcx, CONV_VMINFO_SIGN_FLAG);
      __ jccb(Assembler::zero, zero_extend);

      // this path is taken for int->byte, int->short
      __ sarl(rdx_temp /*, rcx*/);
      __ jmpb(done);

      __ bind(zero_extend);
      // this is taken for int->char
      __ shrl(rdx_temp /*, rcx*/);

      __ bind(done);
      __ movl(vmarg, rdx_temp);  // Store the value.
      __ xchgptr(rcx, rbx_vminfo);                // restore rcx_recv

      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
    {
      // perform an in-place int-to-long or ref-to-long conversion
      __ movl(rax_argslot, rcx_amh_vmargslot);

      // on a little-endian machine we keep the first slot and add another after
      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
      insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
                       rax_argslot, rbx_temp, rdx_temp);
      Address vmarg1(rax_argslot, -Interpreter::stackElementSize);
      Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize);

      switch (ek) {
      case _adapter_opt_i2l:
        {
#ifdef _LP64
          __ movslq(rdx_temp, vmarg1);  // Load sign-extended
          __ movq(vmarg1, rdx_temp);    // Store into first slot
#else
          __ movl(rdx_temp, vmarg1);
          __ sarl(rdx_temp, BitsPerInt - 1);  // __ extend_sign()
          __ movl(vmarg2, rdx_temp); // store second word
#endif
        }
        break;
      case _adapter_opt_unboxl:
        {
          // Load the value up from the heap.
          __ movptr(rdx_temp, vmarg1);
          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
          __ null_check(rdx_temp, value_offset);
#ifdef _LP64
          __ movq(rbx_temp, Address(rdx_temp, value_offset));
          __ movq(vmarg1, rbx_temp);
#else
          __ movl(rbx_temp, Address(rdx_temp, value_offset + 0*BytesPerInt));
          __ movl(rdx_temp, Address(rdx_temp, value_offset + 1*BytesPerInt));
          __ movl(vmarg1, rbx_temp);
          __ movl(vmarg2, rdx_temp);
#endif
        }
        break;
      default:
        ShouldNotReachHere();
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
    {
      // perform an in-place floating primitive conversion
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
      if (ek == _adapter_opt_f2d) {
        insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
                         rax_argslot, rbx_temp, rdx_temp);
      }
      Address vmarg(rax_argslot, -Interpreter::stackElementSize);

#ifdef _LP64
      if (ek == _adapter_opt_f2d) {
        __ movflt(xmm0, vmarg);
        __ cvtss2sd(xmm0, xmm0);
        __ movdbl(vmarg, xmm0);
      } else {
        __ movdbl(xmm0, vmarg);
        __ cvtsd2ss(xmm0, xmm0);
        __ movflt(vmarg, xmm0);
      }
#else //_LP64
      if (ek == _adapter_opt_f2d) {
        __ fld_s(vmarg);        // load float to ST0
        __ fstp_s(vmarg);       // store single
      } else {
        __ fld_d(vmarg);        // load double to ST0
        __ fstp_s(vmarg);       // store single
      }
#endif //_LP64

      if (ek == _adapter_opt_d2f) {
        remove_arg_slots(_masm, -stack_move_unit(),
                         rax_argslot, rbx_temp, rdx_temp);
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_prim_to_ref:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  case _adapter_swap_args:
  case _adapter_rot_args:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_swap_1:
  case _adapter_opt_swap_2:
  case _adapter_opt_rot_1_up:
  case _adapter_opt_rot_1_down:
  case _adapter_opt_rot_2_up:
  case _adapter_opt_rot_2_down:
    {
      int swap_bytes = 0, rotate = 0;
      get_ek_adapter_opt_swap_rot_info(ek, swap_bytes, rotate);

      // 'argslot' is the position of the first argument to swap
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // 'vminfo' is the second
      Register rbx_destslot = rbx_temp;
      __ movl(rbx_destslot, rcx_amh_conversion);
      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
      __ andl(rbx_destslot, CONV_VMINFO_MASK);
      __ lea(rbx_destslot, __ argument_address(rbx_destslot));
      DEBUG_ONLY(verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame"));

      if (!rotate) {
        for (int i = 0; i < swap_bytes; i += wordSize) {
          __ movptr(rdx_temp, Address(rax_argslot , i));
          __ push(rdx_temp);
          __ movptr(rdx_temp, Address(rbx_destslot, i));
          __ movptr(Address(rax_argslot, i), rdx_temp);
          __ pop(rdx_temp);
          __ movptr(Address(rbx_destslot, i), rdx_temp);
        }
      } else {
        // push the first chunk, which is going to get overwritten
        for (int i = swap_bytes; (i -= wordSize) >= 0; ) {
          __ movptr(rdx_temp, Address(rax_argslot, i));
          __ push(rdx_temp);
        }

        if (rotate > 0) {
          // rotate upward
          __ subptr(rax_argslot, swap_bytes);
#ifdef ASSERT
          {
            // Verify that argslot > destslot, by at least swap_bytes.
            Label L_ok;
            __ cmpptr(rax_argslot, rbx_destslot);
            __ jccb(Assembler::aboveEqual, L_ok);
            __ stop("source must be above destination (upward rotation)");
            __ bind(L_ok);
          }
#endif
          // work argslot down to destslot, copying contiguous data upwards
          // pseudo-code:
          //   rax = src_addr - swap_bytes
          //   rbx = dest_addr
          //   while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--;
          Label loop;
          __ bind(loop);
          __ movptr(rdx_temp, Address(rax_argslot, 0));
          __ movptr(Address(rax_argslot, swap_bytes), rdx_temp);
          __ addptr(rax_argslot, -wordSize);
          __ cmpptr(rax_argslot, rbx_destslot);
          __ jccb(Assembler::aboveEqual, loop);
        } else {
          __ addptr(rax_argslot, swap_bytes);
#ifdef ASSERT
          {
            // Verify that argslot < destslot, by at least swap_bytes.
            Label L_ok;
            __ cmpptr(rax_argslot, rbx_destslot);
            __ jccb(Assembler::belowEqual, L_ok);
            __ stop("source must be below destination (downward rotation)");
            __ bind(L_ok);
          }
#endif
          // work argslot up to destslot, copying contiguous data downwards
          // pseudo-code:
          //   rax = src_addr + swap_bytes
          //   rbx = dest_addr
          //   while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++;
          Label loop;
          __ bind(loop);
          __ movptr(rdx_temp, Address(rax_argslot, 0));
          __ movptr(Address(rax_argslot, -swap_bytes), rdx_temp);
          __ addptr(rax_argslot, wordSize);
          __ cmpptr(rax_argslot, rbx_destslot);
          __ jccb(Assembler::belowEqual, loop);
        }

        // pop the original first chunk into the destination slot, now free
        for (int i = 0; i < swap_bytes; i += wordSize) {
          __ pop(rdx_temp);
          __ movptr(Address(rbx_destslot, i), rdx_temp);
        }
      }

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_dup_args:
    {
      // 'argslot' is the position of the first argument to duplicate
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // 'stack_move' is negative number of words to duplicate
      Register rdx_stack_move = rdx_temp;
      __ movl2ptr(rdx_stack_move, rcx_amh_conversion);
      __ sarptr(rdx_stack_move, CONV_STACK_MOVE_SHIFT);

      int argslot0_num = 0;
      Address argslot0 = __ argument_address(RegisterOrConstant(argslot0_num));
      assert(argslot0.base() == rsp, "");
      int pre_arg_size = argslot0.disp();
      assert(pre_arg_size % wordSize == 0, "");
      assert(pre_arg_size > 0, "must include PC");

      // remember the old rsp+1 (argslot[0])
      Register rbx_oldarg = rbx_temp;
      __ lea(rbx_oldarg, argslot0);

      // move rsp down to make room for dups
      __ lea(rsp, Address(rsp, rdx_stack_move, Address::times_ptr));

      // compute the new rsp+1 (argslot[0])
      Register rdx_newarg = rdx_temp;
      __ lea(rdx_newarg, argslot0);

      __ push(rdi);             // need a temp
      // (preceding push must be done after arg addresses are taken!)

      // pull down the pre_arg_size data (PC)
      for (int i = -pre_arg_size; i < 0; i += wordSize) {
        __ movptr(rdi, Address(rbx_oldarg, i));
        __ movptr(Address(rdx_newarg, i), rdi);
      }

      // copy from rax_argslot[0...] down to new_rsp[1...]
      // pseudo-code:
      //   rbx = old_rsp+1
      //   rdx = new_rsp+1
      //   rax = argslot
      //   while (rdx < rbx) *rdx++ = *rax++
      Label loop;
      __ bind(loop);
      __ movptr(rdi, Address(rax_argslot, 0));
      __ movptr(Address(rdx_newarg, 0), rdi);
      __ addptr(rax_argslot, wordSize);
      __ addptr(rdx_newarg, wordSize);
      __ cmpptr(rdx_newarg, rbx_oldarg);
      __ jccb(Assembler::less, loop);

      __ pop(rdi);              // restore temp

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_drop_args:
    {
      // 'argslot' is the position of the first argument to nuke
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      __ push(rdi);             // need a temp
      // (must do previous push after argslot address is taken)

      // 'stack_move' is number of words to drop
      Register rdi_stack_move = rdi;
      __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
      __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
      remove_arg_slots(_masm, rdi_stack_move,
                       rax_argslot, rbx_temp, rdx_temp);

      __ pop(rdi);              // restore temp

      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
    }
    break;

  case _adapter_collect_args:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  case _adapter_spread_args:
    // handled completely by optimized cases
    __ stop("init_AdapterMethodHandle should not issue this");
    break;

  case _adapter_opt_spread_0:
  case _adapter_opt_spread_1:
  case _adapter_opt_spread_more:
    {
      // spread an array out into a group of arguments
      int length_constant = get_ek_adapter_opt_spread_info(ek);

      // find the address of the array argument
      __ movl(rax_argslot, rcx_amh_vmargslot);
      __ lea(rax_argslot, __ argument_address(rax_argslot));

      // grab some temps
      { __ push(rsi); __ push(rdi); }
      // (preceding pushes must be done after argslot address is taken!)
#define UNPUSH_RSI_RDI \
      { __ pop(rdi); __ pop(rsi); }

      // arx_argslot points both to the array and to the first output arg
      vmarg = Address(rax_argslot, 0);

      // Get the array value.
      Register  rsi_array       = rsi;
      Register  rdx_array_klass = rdx_temp;
      BasicType elem_type       = T_OBJECT;
      int       length_offset   = arrayOopDesc::length_offset_in_bytes();
      int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
      __ movptr(rsi_array, vmarg);
      Label skip_array_check;
      if (length_constant == 0) {
        __ testptr(rsi_array, rsi_array);
        __ jcc(Assembler::zero, skip_array_check);
      }
      __ null_check(rsi_array, oopDesc::klass_offset_in_bytes());
      __ load_klass(rdx_array_klass, rsi_array);

      // Check the array type.
      Register rbx_klass = rbx_temp;
      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));

      Label ok_array_klass, bad_array_klass, bad_array_length;
      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi, ok_array_klass);
      // If we get here, the type check failed!
      __ jmp(bad_array_klass);
      __ bind(ok_array_klass);

      // Check length.
      if (length_constant >= 0) {
        __ cmpl(Address(rsi_array, length_offset), length_constant);
      } else {
        Register rbx_vminfo = rbx_temp;
        __ movl(rbx_vminfo, rcx_amh_conversion);
        assert(CONV_VMINFO_SHIFT == 0, "preshifted");
        __ andl(rbx_vminfo, CONV_VMINFO_MASK);
        __ cmpl(rbx_vminfo, Address(rsi_array, length_offset));
      }
      __ jcc(Assembler::notEqual, bad_array_length);

      Register rdx_argslot_limit = rdx_temp;

      // Array length checks out.  Now insert any required stack slots.
      if (length_constant == -1) {
        // Form a pointer to the end of the affected region.
        __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
        // 'stack_move' is negative number of words to insert
        Register rdi_stack_move = rdi;
        __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
        __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
        Register rsi_temp = rsi_array;  // spill this
        insert_arg_slots(_masm, rdi_stack_move, -1,
                         rax_argslot, rbx_temp, rsi_temp);
        // reload the array (since rsi was killed)
        __ movptr(rsi_array, vmarg);
      } else if (length_constant > 1) {
        int arg_mask = 0;
        int new_slots = (length_constant - 1);
        for (int i = 0; i < new_slots; i++) {
          arg_mask <<= 1;
          arg_mask |= _INSERT_REF_MASK;
        }
        insert_arg_slots(_masm, new_slots * stack_move_unit(), arg_mask,
                         rax_argslot, rbx_temp, rdx_temp);
      } else if (length_constant == 1) {
        // no stack resizing required
      } else if (length_constant == 0) {
        remove_arg_slots(_masm, -stack_move_unit(),
                         rax_argslot, rbx_temp, rdx_temp);
      }

      // Copy from the array to the new slots.
      // Note: Stack change code preserves integrity of rax_argslot pointer.
      // So even after slot insertions, rax_argslot still points to first argument.
      if (length_constant == -1) {
        // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
        Register rsi_source = rsi_array;
        __ lea(rsi_source, Address(rsi_array, elem0_offset));
        Label loop;
        __ bind(loop);
        __ movptr(rbx_temp, Address(rsi_source, 0));
        __ movptr(Address(rax_argslot, 0), rbx_temp);
        __ addptr(rsi_source, type2aelembytes(elem_type));
        __ addptr(rax_argslot, Interpreter::stackElementSize);
        __ cmpptr(rax_argslot, rdx_argslot_limit);
        __ jccb(Assembler::less, loop);
      } else if (length_constant == 0) {
        __ bind(skip_array_check);
        // nothing to copy
      } else {
        int elem_offset = elem0_offset;
        int slot_offset = 0;
        for (int index = 0; index < length_constant; index++) {
          __ movptr(rbx_temp, Address(rsi_array, elem_offset));
          __ movptr(Address(rax_argslot, slot_offset), rbx_temp);
          elem_offset += type2aelembytes(elem_type);
           slot_offset += Interpreter::stackElementSize;
        }
      }

      // Arguments are spread.  Move to next method handle.
      UNPUSH_RSI_RDI;
      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);

      __ bind(bad_array_klass);
      UNPUSH_RSI_RDI;
      assert(!vmarg.uses(rarg2_required), "must be different registers");
      __ movptr(rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
      __ movptr(rarg1_actual,   vmarg);                                         // bad array
      __ movl(  rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

      __ bind(bad_array_length);
      UNPUSH_RSI_RDI;
      assert(!vmarg.uses(rarg2_required), "must be different registers");
      __ mov   (rarg2_required, rcx_recv);                       // AMH requiring a certain length
      __ movptr(rarg1_actual,   vmarg);                          // bad array
      __ movl(  rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));

#undef UNPUSH_RSI_RDI
    }
    break;

  case _adapter_flyby:
  case _adapter_ricochet:
    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
    break;

  default:  ShouldNotReachHere();
  }
  __ hlt();

  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI

  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
}
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
  address entry = __ pc();

  // rbx: method
  // r14: pointer to locals
  // c_rarg3: first stack arg - wordSize
  __ mov(c_rarg3, rsp);
  // adjust rsp
  __ subptr(rsp, 4 * wordSize);
  __ call_VM(noreg,
             CAST_FROM_FN_PTR(address,
                              InterpreterRuntime::slow_signature_handler),
             rbx, r14, c_rarg3);

  // rax: result handler

  // Stack layout:
  // rsp: 3 integer or float args (if static first is unused)
  //      1 float/double identifiers
  //        return address
  //        stack args
  //        garbage
  //        expression stack bottom
  //        bcp (NULL)
  //        ...

  // Do FP first so we can use c_rarg3 as temp
  __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers

  for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
    XMMRegister floatreg = as_XMMRegister(i+1);
    Label isfloatordouble, isdouble, next;

    __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
    __ jcc(Assembler::notZero, isfloatordouble);

    // Do Int register here
    switch ( i ) {
      case 0:
        __ movl(rscratch1, Address(rbx, Method::access_flags_offset()));
        __ testl(rscratch1, JVM_ACC_STATIC);
        __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
        break;
      case 1:
        __ movptr(c_rarg2, Address(rsp, wordSize));
        break;
      case 2:
        __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
        break;
      default:
        break;
    }

    __ jmp (next);

    __ bind(isfloatordouble);
    __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
    __ jcc(Assembler::notZero, isdouble);

// Do Float Here
    __ movflt(floatreg, Address(rsp, i * wordSize));
    __ jmp(next);

// Do Double here
    __ bind(isdouble);
    __ movdbl(floatreg, Address(rsp, i * wordSize));

    __ bind(next);
  }


  // restore rsp
  __ addptr(rsp, 4 * wordSize);

  __ ret(0);

  return entry;
}
// used by compiler only; may use only caller saved registers eax, ebx, ecx.
// edx holds first int arg, esi, edi, ebp are callee-save & must be preserved.
// Leave reciever in ecx; required behavior when +OptoArgsInRegisters
// is modifed to put first oop in ecx.
//
// NOTE: If this code is used by the C1, the receiver_location is always 0.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index, int receiver_location) {
  const int i486_code_length = VtableStub::pd_code_size_limit(true);
  VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index, receiver_location);
  ResourceMark rm;
  MacroAssembler* masm = new MacroAssembler(new CodeBuffer(s->entry_point(), i486_code_length));

#ifndef PRODUCT
#ifdef COMPILER2
  if (CountCompiledCalls) __ incl(Address((int)OptoRuntime::nof_megamorphic_calls_addr(), relocInfo::none));
#endif
#endif

  // get receiver (need to skip return address on top of stack)
#ifdef COMPILER1
  assert(receiver_location == 0, "receiver is always in ecx - no location info needed");
#else
  if( receiver_location < SharedInfo::stack0 ) {
    assert(receiver_location == ECX_num, "receiver expected in ecx");
  } else {
    __ movl(ecx, Address(esp, SharedInfo::reg2stack(OptoReg::Name(receiver_location)) * wordSize+wordSize/*skip return address*/));
  }
#endif
  // get receiver klass
  address npe_addr = __ pc();
  __ movl(eax, Address(ecx, oopDesc::klass_offset_in_bytes()));
  // compute entry offset (in words)
  int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
#ifndef PRODUCT
  if (DebugVtables) { 
    Label L;
    // check offset vs vtable length
    __ cmpl(Address(eax, instanceKlass::vtable_length_offset()*wordSize), vtable_index*vtableEntry::size());
    __ jcc(Assembler::greater, L);
    __ movl(ebx, vtable_index);
    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), ecx, ebx);
    __ bind(L);
  }
#endif // PRODUCT
  // load methodOop and target address
#ifdef COMPILER1
  __ movl(ebx, Address(eax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
  address ame_addr = __ pc();
  __ movl(edx, Address(ebx, methodOopDesc::from_compiled_code_entry_point_offset()));
  if (DebugVtables) {
    Label L;
    __ testl(edx, edx);
    __ jcc(Assembler::notZero, L);
    __ stop("Vtable entry is NULL");
    __ bind(L);
  }
  // eax: receiver klass
  // ebx: methodOop
  // ecx: receiver
  // edx: entry point
  __ jmp(edx);
#else
  __ movl(eax, Address(eax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
  address ame_addr = __ pc();
  __ movl(ebx, Address(eax, methodOopDesc::from_compiled_code_entry_point_offset()));  

  if (DebugVtables) {
    Label L;
    __ testl(ebx, ebx);
    __ jcc(Assembler::notZero, L);
    __ stop("Vtable entry is NULL");
    __ bind(L);
  } 


  // jump to target (either compiled code or c2iadapter)
  // eax: methodOop (in case we call c2iadapter)
  __ jmp(ebx);
#endif // COMPILER1

  masm->flush();
  s->set_exception_points(npe_addr, ame_addr);
  return s;
}