static void slow_call_thr_specific(MacroAssembler* _masm, Register thread) { // slow call to of thr_getspecific // int thr_getspecific(thread_key_t key, void **value); // Consider using pthread_getspecific instead. __ push(0); // allocate space for return value if (thread != rax) __ push(rax); // save rax, if caller still wants it __ push(rcx); // save caller save __ push(rdx); // save caller save if (thread != rax) { __ lea(thread, Address(rsp, 3 * sizeof(int))); // address of return value } else { __ lea(thread, Address(rsp, 2 * sizeof(int))); // address of return value } __ push(thread); // and pass the address __ push(ThreadLocalStorage::thread_index()); // the key __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, thr_getspecific))); __ increment(rsp, 2 * wordSize); __ pop(rdx); __ pop(rcx); if (thread != rax) __ pop(rax); __ pop(thread); }
// Helper to insert argument slots into the stack. // arg_slots must be a multiple of stack_move_unit() and <= 0 void MethodHandles::insert_arg_slots(MacroAssembler* _masm, RegisterOrConstant arg_slots, int arg_mask, Register rax_argslot, Register rbx_temp, Register rdx_temp, Register temp3_reg) { assert(temp3_reg == noreg, "temp3 not required"); assert_different_registers(rax_argslot, rbx_temp, rdx_temp, (!arg_slots.is_register() ? rsp : arg_slots.as_register())); #ifdef ASSERT verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame"); if (arg_slots.is_register()) { Label L_ok, L_bad; __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD); __ jccb(Assembler::greater, L_bad); __ testl(arg_slots.as_register(), -stack_move_unit() - 1); __ jccb(Assembler::zero, L_ok); __ bind(L_bad); __ stop("assert arg_slots <= 0 and clear low bits"); __ bind(L_ok); } else { assert(arg_slots.as_constant() <= 0, ""); assert(arg_slots.as_constant() % -stack_move_unit() == 0, ""); } #endif //ASSERT #ifdef _LP64 if (arg_slots.is_register()) { // clean high bits of stack motion register (was loaded as an int) __ movslq(arg_slots.as_register(), arg_slots.as_register()); } #endif // Make space on the stack for the inserted argument(s). // Then pull down everything shallower than rax_argslot. // The stacked return address gets pulled down with everything else. // That is, copy [rsp, argslot) downward by -size words. In pseudo-code: // rsp -= size; // for (rdx = rsp + size; rdx < argslot; rdx++) // rdx[-size] = rdx[0] // argslot -= size; BLOCK_COMMENT("insert_arg_slots {"); __ mov(rdx_temp, rsp); // source pointer for copy __ lea(rsp, Address(rsp, arg_slots, Address::times_ptr)); { Label loop; __ BIND(loop); // pull one word down each time through the loop __ movptr(rbx_temp, Address(rdx_temp, 0)); __ movptr(Address(rdx_temp, arg_slots, Address::times_ptr), rbx_temp); __ addptr(rdx_temp, wordSize); __ cmpptr(rdx_temp, rax_argslot); __ jccb(Assembler::less, loop); } // Now move the argslot down, to point to the opened-up space. __ lea(rax_argslot, Address(rax_argslot, arg_slots, Address::times_ptr)); BLOCK_COMMENT("} insert_arg_slots"); }
void main() { int n; void inorden (struct nodo *raiz); struct nodo *raiz=NULL; int ins_avl(struct nodo **,int); int lea(); printf ("lea n \n"); n = lea(); while (n != 9999) { ins_avl (&raiz,n); printf ("lea n \n"); n = lea(); } inorden(raiz); }
// inputs should be preserved outside if required since we do a call // num_std_need_to_save registers will be preserved char * m2n_gen_push_m2n(char * buf, Method_Handle method, frame_type current_frame_type, bool handles, unsigned num_callee_saves, unsigned num_std_need_to_save, I_32 bytes_to_m2n_top) { // skip callee-saves registers bytes_to_m2n_top -= num_callee_saves * LcgEM64TContext::GR_SIZE; // TODO: check if it makes sense to save all callee-saves registers here //store rest of callee-saves registers for (unsigned i = num_callee_saves; i < LcgEM64TContext::MAX_GR_LOCALS; i++) { bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), LcgEM64TContext::get_reg_from_map(LcgEM64TContext::GR_LOCALS_OFFSET + i), size_64); } // init pop_regs to null bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), Imm_Opnd(size_32, 0), size_64); // store current_frame_type bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; assert(fit32(current_frame_type)); buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), Imm_Opnd(size_32, current_frame_type), size_64); // store a method associated with the current m2n frame bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; if (fit32((int64)method)) { buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), Imm_Opnd(size_32, (int64)method), size_64); } else { buf = mov(buf, rax_opnd, Imm_Opnd(size_64, (int64)method), size_64); buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), rax_opnd); } // store local object handles bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), Imm_Opnd(size_64, (int64)0), size_64); // move pointer to the current VM_Thread structure to rax buf = m2n_gen_ts_to_register(buf, &rax_opnd, num_callee_saves, LcgEM64TContext::MAX_GR_LOCALS, num_std_need_to_save, 0); // shift to the last_m2n_frame field I_32 last_m2n_frame_offset = (I_32)(int64)&((VM_thread*)0)->last_m2n_frame; buf = alu(buf, add_opc, rax_opnd, Imm_Opnd(size_32, last_m2n_frame_offset), size_64); // store pointer to pointer to last m2n frame bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), rax_opnd, size_64); // save pointer to the previous m2n frame bytes_to_m2n_top -= LcgEM64TContext::GR_SIZE; buf = mov(buf, r9_opnd, M_Base_Opnd(rax_reg, 0)); buf = mov(buf, M_Base_Opnd(rsp_reg, bytes_to_m2n_top), r9_opnd, size_64); // update last m2n frame of the current thread buf = lea(buf, r9_opnd, M_Base_Opnd(rsp_reg, bytes_to_m2n_top)); buf = mov(buf, M_Base_Opnd(rax_reg, 0), r9_opnd, size_64); return buf; }
Code() : Xbyak::CodeGenerator(sizeof(buf), buf) { puts("generate"); printf("ptr=%p, %p\n", getCode(), buf); Xbyak::CodeArray::protect(buf, sizeof(buf), true); #ifdef XBYAK32 mov(eax, ptr [esp + 4]); add(eax, ptr [esp + 8]); #elif defined(XBYAK64_WIN) lea(rax, ptr [rcx + rdx]); #else lea(rax, ptr [rdi + rsi]); #endif ret(); }
int lea_grafo (V grafo[],V g_invertido[], int *nv, int *nact ) { void ins_lista (V g[],int v, int ad, int act, int tiempo); int v,ad,i,n,act,tiempo; int lea(); PR("De numero de vertices..."); SALTO; *nv = n = lea(); PR("De numero de actividades..."); SALTO; *nact = lea(); for (i=1; i <= n; i++) { grafo[i].v = g_invertido [i].v = 0; grafo[i].cab = g_invertido [i].cab = NULL; } PR ("Lea el primer vertice. 99 para terminar..."); SALTO; v = lea(); grafo[v].v = v; while (v != 99) { PR ("Lea adjunto, actividad, tiempo al vertice"); printf ("%d ",v); PR(". 99 para terminar"); SALTO; ad = lea(); act = lea(); tiempo = lea(); while (ad != 99) { ins_lista (grafo,v,ad,act,tiempo); ins_lista (g_invertido,ad,v,act,tiempo); PR ("Lea adjunto, actividad, tiempo al vertice "); printf ("%d ",v); PR(". 99 para terminar"); SALTO; ad = lea(); act = lea(); tiempo = lea(); } PR ("Lea otro vertice. 99 para terminar..."); SALTO; v = lea(); grafo[v].v = v; } return (n); }
void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { // generate code to handle arguments iterate(fingerprint); // return result handler __ lea(rax, ExternalAddress(Interpreter::result_handler(method()->result_type()))); __ ret(0); __ flush(); }
void ArrayCopyStub::emit_code(LIR_Assembler* ce) { //---------------slow case: call to native----------------- __ bind(_entry); // Figure out where the args should go // This should really convert the IntrinsicID to the Method* and signature // but I don't know how to do that. // VMRegPair args[5]; BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; SharedRuntime::java_calling_convention(signature, args, 5, true); // push parameters // (src, src_pos, dest, destPos, length) Register r[5]; r[0] = src()->as_register(); r[1] = src_pos()->as_register(); r[2] = dst()->as_register(); r[3] = dst_pos()->as_register(); r[4] = length()->as_register(); // next registers will get stored on the stack for (int i = 0; i < 5 ; i++ ) { VMReg r_1 = args[i].first(); if (r_1->is_stack()) { int st_off = r_1->reg2stack() * wordSize; __ str (r[i], Address(sp, st_off)); } else { assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); } } ce->align_call(lir_static_call); ce->emit_static_call_stub(); if (ce->compilation()->bailed_out()) { return; // CodeCache is full } Address resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type); address call = __ trampoline_call(resolve); if (call == NULL) { ce->bailout("trampoline stub overflow"); return; } ce->add_call_info_here(info()); #ifndef PRODUCT __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); __ incrementw(Address(rscratch2)); #endif __ b(_continuation); }
int lea_grafo (int grafo[][MAXIMO]) { int v,ad,i,j,n,costo,lea(); PR("De numero de vertices..."); SALTO; n = lea(); for (i=1; i <= n; i++) for (j=1; j <=n; j++) grafo[i][j] = 32767; PR ("Vertice ... "); v = 1; printf ("%d",v); SALTO; while (v <= n) { PR ("Lea el primer adjunto al vertice "); printf ("%d ",v); PR(". 99 para terminar"); SALTO; ad = lea(); while (ad != 99) { PR("Lea costo del arco");SALTO; costo = lea(); grafo [v][ad] = costo; PR ("Lea otro adjunto al vertice "); printf ("%d ",v); PR(". 99 para terminar"); SALTO; ad = lea(); } PR ("Vertice ..."); v++; printf ("%d ",v); SALTO; } return (n); }
void main() { pagina *raiz=NULL; int x,min,s; int lea(); void ins_b (pagina **raiz,int x,int *s); void listar1_b (pagina *p,int l); void retira_b (pagina **raiz, int x, int *s); printf ("Comienzo..\n"); printf ("De llave\n"); min = lea(); while (min != 9999) { ins_b (&raiz, min, &s); if (s == 1) printf ("La llave ya existe\n"); printf ("De llave\n"); min = lea(); } printf ("\n"); listar1_b (raiz,0); getch(); printf ("\nRetiros\n"); printf ("De llave a retirar\n"); x = lea(); while (x != 9999) { retira_b (&raiz, x, &s); if (s == 0) printf ("La llave no existe\n"); printf ("De llave a retirar\n"); x = lea(); } printf ("\n"); listar1_b (raiz,0); getch(); }
/** * Method entry for static native methods: * int java.util.zip.CRC32.update(int crc, int b) */ address TemplateInterpreterGenerator::generate_CRC32_update_entry() { if (UseCRC32Intrinsics) { address entry = __ pc(); // rbx,: Method* // r13: senderSP must preserved for slow path, set SP to it on fast path // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) Label slow_path; // If we need a safepoint check, generate full interpreter entry. ExternalAddress state(SafepointSynchronize::address_of_state()); __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), SafepointSynchronize::_not_synchronized); __ jcc(Assembler::notEqual, slow_path); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. // Load parameters const Register crc = rax; // crc const Register val = c_rarg0; // source java byte value const Register tbl = c_rarg1; // scratch // Arguments are reversed on java expression stack __ movl(val, Address(rsp, wordSize)); // byte value __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); __ notl(crc); // ~crc __ update_byte_crc32(crc, val, tbl); __ notl(crc); // ~crc // result in rax // _areturn __ pop(rdi); // get return address __ mov(rsp, r13); // set sp to sender sp __ jmp(rdi); // generate a vanilla native entry as the slow path __ bind(slow_path); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } return NULL; }
void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { if (!TraceMethodHandles) return; BLOCK_COMMENT("trace_method_handle {"); __ push(rax); __ lea(rax, Address(rsp, wordSize*6)); // entry_sp __ pusha(); // arguments: __ push(rbp); // interpreter frame pointer __ push(rsi); // saved_sp __ push(rax); // entry_sp __ push(rcx); // mh __ push(rcx); __ movptr(Address(rsp, 0), (intptr_t) adaptername); __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), 5); __ popa(); __ pop(rax); BLOCK_COMMENT("} trace_method_handle"); }
static void slow_call_thr_specific(MacroAssembler* _masm, Register thread) { // slow call to of thr_getspecific // int thr_getspecific(thread_key_t key, void **value); // Consider using pthread_getspecific instead. if (thread != rax) { __ push(rax); } __ push(0); // space for return value __ push(rdi); __ push(rsi); __ lea(rsi, Address(rsp, 16)); // pass return value address __ push(rdx); __ push(rcx); __ push(r8); __ push(r9); __ push(r10); // XXX __ mov(r10, rsp); __ andptr(rsp, -16); __ push(r10); __ push(r11); __ movl(rdi, ThreadLocalStorage::thread_index()); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, thr_getspecific))); __ pop(r11); __ pop(rsp); __ pop(r10); __ pop(r9); __ pop(r8); __ pop(rcx); __ pop(rdx); __ pop(rsi); __ pop(rdi); __ pop(thread); // load return value if (thread != rax) { __ pop(rax); } }
main() { int ant,i,n,*p; printf ("Cuantos elementos desea Clasificar?\n"); n = lea(); printf ("\nDe el valor de los elementos\n"); p = (int *) calloc (n,2); for (i = 1; i <= n; i++ ) { p[i] = random (1000); printf ("%d\n",p[i]); } S_INSERCION (p,n); ant = p[1]; for (i=1; i<=n; i++) { if (ant > p[i]) { printf ("error");getch(); exit(1); } else ant = p[i]; printf ("%d ",p[i]); } getch(); }
main() { int ant,i,n,*p; printf ("Cuantos elementos desea Clasificar?\n"); n = lea(); printf ("\nDe el valor de los elementos\n"); p = (int *) calloc (n,2); for (i = 0; i < n; i++ ) { p[i] = random (1000); printf ("%d\n",p[i]); } QUICK_SORT (p,0,n-1); ant = p[0]; for (i=0; i<n; i++) { if (ant > p[i]) { printf ("error");getch(); exit(1); } else ant = p[i]; printf ("%d ",p[i]); } getch(); }
/* double jit(double x); @note 32bit: x : [esp+4], return fp0 64bit: x [rcx](win), xmm0(gcc), return xmm0 */ Jit() : negConst_(0x8000000000000000ULL) , constTblPos_(0) , regIdx_(-1) #ifdef XBYAK32 , varTbl_(eax) , tbl_(edx) #elif defined(XBYAK64_WIN) , tbl_(rcx) #else , tbl_(rdi) #endif { #ifdef XBYAK32 lea(varTbl_, ptr[esp+4]); #else #ifdef XBYAK64_WIN movaps(ptr [rsp + 8], xm6); // save xm6, xm7 movaps(ptr [rsp + 8 + 16], xm7); #endif movaps(xm7, xm0); // save xm0 #endif mov(tbl_, (size_t)constTbl_); }
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { const char *name; switch (type) { case T_FLOAT: name = "jni_fast_GetFloatField"; break; case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow_with_pop, slow; // stack layout: offset from rsp (in words): // return pc 0 // jni env 1 // obj 2 // jfieldID 3 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr(rdx, Address(rsp, 2*wordSize)); // obj } __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr(rax, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { #ifndef _LP64 case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break; case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break; #else case T_FLOAT: __ movflt (xmm0, Address(robj, roffset, Address::times_1)); break; case T_DOUBLE: __ movdbl (xmm0, Address(robj, roffset, Address::times_1)); break; #endif // _LP64 default: ShouldNotReachHere(); } Address ca1; if (os::is_MP()) { __ fst_s (Address(rsp, -4)); __ lea(rdx, counter); __ movl (rax, Address(rsp, -4)); // garbage hi-order bits on 64bit are harmless. __ xorptr(rdx, rax); __ xorptr(rdx, rax); __ cmp32(rcx, Address(rdx, 0)); // rax, ^ counter_addr ^ rax, = address // ca1 is data dependent on the field // access. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow_with_pop); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif __ bind (slow_with_pop); // invalid load. pop FPU stack. __ fstp_d (0); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr; switch (type) { case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else switch (type) { case T_FLOAT: jni_fast_GetFloatField_fp = (GetFloatField_t)fast_entry; break; case T_DOUBLE: jni_fast_GetDoubleField_fp = (GetDoubleField_t)fast_entry; } return os::win32::fast_jni_accessor_wrapper(type); #endif }
address JNI_FastGetField::generate_fast_get_long_field() { const char *name = "jni_fast_GetLongField"; ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow; // stack layout: offset from rsp (in words): // old rsi 0 // return pc 1 // jni env 2 // obj 3 // jfieldID 4 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ push (rsi); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr(rdx, Address(rsp, 3*wordSize)); // obj } __ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr(rsi, 2); // offset assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); speculative_load_pclist[count++] = __ pc(); __ movptr(rax, Address(rdx, rsi, Address::times_1)); #ifndef _LP64 speculative_load_pclist[count] = __ pc(); __ movl(rdx, Address(rdx, rsi, Address::times_1, 4)); #endif // _LP64 if (os::is_MP()) { __ lea(rsi, counter); __ xorptr(rsi, rdx); __ xorptr(rsi, rax); __ xorptr(rsi, rdx); __ xorptr(rsi, rax); __ cmp32(rcx, Address(rsi, 0)); // ca1 is the same as ca because // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address // ca1 is data dependent on both rax, and rdx. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow); __ pop (rsi); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif slowcase_entry_pclist[count-1] = __ pc(); slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); __ pop (rsi); address slow_case_addr = jni_GetLongField_addr();; // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else jni_fast_GetLongField_fp = (GetLongField_t)fast_entry; return os::win32::fast_jni_accessor_wrapper(T_LONG); #endif }
struct code encode(struct instruction instr) { switch (instr.opcode) { case INSTR_ADD: return add(instr.optype, instr.source, instr.dest); case INSTR_NOT: return not(instr.optype, instr.source); case INSTR_MUL: return mul(instr.optype, instr.source); case INSTR_XOR: return xor(instr.optype, instr.source, instr.dest); case INSTR_DIV: return encode_div(instr.optype, instr.source); case INSTR_AND: return and(instr.optype, instr.source, instr.dest); case INSTR_OR: return or(instr.optype, instr.source, instr.dest); case INSTR_SHL: return shl(instr.optype, instr.source, instr.dest); case INSTR_SHR: return shr(instr.optype, instr.source, instr.dest); case INSTR_SAR: return sar(instr.optype, instr.source, instr.dest); case INSTR_CALL: return call(instr.optype, instr.source); case INSTR_CMP: return cmp(instr.optype, instr.source, instr.dest); case INSTR_MOV: return mov(instr.optype, instr.source, instr.dest); case INSTR_MOVSX: return movsx(instr.optype, instr.source, instr.dest); case INSTR_MOVZX: return movzx(instr.optype, instr.source, instr.dest); case INSTR_MOVAPS: return movaps(instr.optype, instr.source, instr.dest); case INSTR_PUSH: return push(instr.optype, instr.source); case INSTR_SUB: return sub(instr.optype, instr.source, instr.dest); case INSTR_LEA: return lea(instr.optype, instr.source, instr.dest); case INSTR_LEAVE: return leave(); case INSTR_REP_MOVSQ: assert(instr.optype == OPT_NONE); return rep_movsq(); case INSTR_RET: return ret(); case INSTR_JMP: return jmp(instr.optype, instr.source); case INSTR_JA: return jcc(instr.optype, TEST_A, instr.source); case INSTR_JG: return jcc(instr.optype, TEST_G, instr.source); case INSTR_JZ: return jcc(instr.optype, TEST_Z, instr.source); case INSTR_JAE: return jcc(instr.optype, TEST_AE, instr.source); case INSTR_JGE: return jcc(instr.optype, TEST_GE, instr.source); case INSTR_SETZ: return setcc(instr.optype, TEST_Z, instr.source); case INSTR_SETA: return setcc(instr.optype, TEST_A, instr.source); case INSTR_SETG: return setcc(instr.optype, TEST_G, instr.source); case INSTR_SETAE: return setcc(instr.optype, TEST_AE, instr.source); case INSTR_SETGE: return setcc(instr.optype, TEST_GE, instr.source); case INSTR_TEST: return test(instr.optype, instr.source, instr.dest); default: return nop(); } }
explicit Code(int mode, size_t size, void *p) : Xbyak::CodeGenerator(size, p) { inLocalLabel(); #ifdef XBYAK64 const Xbyak::Reg64& a = rax; const Xbyak::Reg64& c = rcx; #ifdef XBYAK64_WIN mov(rax, rcx); #else mov(rax, rdi); #endif #else const Xbyak::Reg32& a = eax; const Xbyak::Reg32& c = ecx; mov(a, ptr [esp + 4]); #endif switch (mode) { case 0: mov(c, ".jmp_table"); lea(c, ptr [c + a * 8]); jmp(c); align(8); L(".jmp_table"); mov(a, expectTbl[0]); ret(); align(8); mov(a, expectTbl[1]); ret(); align(8); mov(a, expectTbl[2]); ret(); break; case 1: /* the label for putL is defined when called */ mov(c, ".jmp_table"); jmp(ptr [c + a * (int)sizeof(size_t)]); L(".label1"); mov(a, expectTbl[0]); jmp(".end"); L(".label2"); mov(a, expectTbl[1]); jmp(".end"); L(".label3"); mov(a, expectTbl[2]); jmp(".end"); L(".end"); ret(); ud2(); align(8); L(".jmp_table"); putL(".label1"); putL(".label2"); putL(".label3"); break; case 2: /* the label for putL is not defined when called */ jmp(".in"); ud2(); align(8); L(".jmp_table"); putL(".label1"); putL(".label2"); putL(".label3"); L(".in"); mov(c, ".jmp_table"); jmp(ptr [c + a * (int)sizeof(size_t)]); L(".label1"); mov(a, expectTbl[0]); jmp(".end"); L(".label2"); mov(a, expectTbl[1]); jmp(".end"); L(".label3"); mov(a, expectTbl[2]); jmp(".end"); L(".end"); ret(); break; } outLocalLabel(); }
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table; bind(start); subl(rsp, 120); movl(Address(rsp, 64), tmp); lea(tmp, ExternalAddress(static_const_table)); movdqu(xmm0, Address(rsp, 128)); unpcklpd(xmm0, xmm0); movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_1_0_2); fnstcw(Address(rsp, 24)); movzwl(edx, Address(rsp, 24)); orl(edx, 768); movw(Address(rsp, 28), edx); fldcw(Address(rsp, 28)); movl(edx, eax); sarl(eax, 1); subl(edx, eax); movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL pandn(xmm6, xmm2); addl(eax, 1023); movdl(xmm3, eax); psllq(xmm3, 52); por(xmm6, xmm3); addl(edx, 1023); movdl(xmm4, edx); psllq(xmm4, 52); movsd(Address(rsp, 8), xmm0); fld_d(Address(rsp, 8)); movsd(Address(rsp, 16), xmm6); fld_d(Address(rsp, 16)); fmula(1); faddp(1); movsd(Address(rsp, 8), xmm4); fld_d(Address(rsp, 8)); fmulp(1); fstp_d(Address(rsp, 8)); movsd(xmm0,Address(rsp, 8)); fldcw(Address(rsp, 24)); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_2_0_2); cmpl(ecx, INT_MIN); jcc(Assembler::less, L_2TAG_PACKET_3_0_2); cmpl(ecx, -1064950997); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); movl(edx, Address(rsp, 128)); cmpl(edx ,-17155601); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jmp(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_3_0_2); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_4_0_2); movl(edx, 15); bind(L_2TAG_PACKET_5_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 128)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_7_0_2); cmpl(eax, 2146435072); jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, INT_MIN); jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1216)); mulsd(xmm0, xmm0); movl(edx, 15); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_8_0_2); movl(edx, Address(rsp, 128)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_10_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 48), xmm0); fld_d(Address(rsp, 48)); bind(L_2TAG_PACKET_6_0_2); movl(tmp, Address(rsp, 64)); }
void GSDrawScanlineCodeGenerator::Init() { // int skip = left & 3; mov(rbx, rdx); and(rdx, 3); // left -= skip; sub(rbx, rdx); // int steps = pixels + skip - 4; lea(rcx, ptr[rcx + rdx - 4]); // GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))]; shl(rdx, 4); vmovdqa(xmm15, ptr[rdx + r10]); mov(rax, rcx); sar(rax, 63); and(rax, rcx); shl(rax, 4); vpor(xmm15, ptr[rax + r10 + 7 * 16]); // GSVector2i* fza_base = &m_local.gd->fzbr[top]; mov(rax, (size_t)m_local.gd->fzbr); lea(rsi, ptr[rax + r8 * 8]); // GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2]; mov(rax, (size_t)m_local.gd->fzbc); lea(rdi, ptr[rax + rbx * 2]); if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] lea(rdx, ptr[rdx * 8 + r11 + offsetof(GSScanlineLocalData, d)]); } if(!m_sel.sprite) { if(m_sel.fwrite && m_sel.fge || m_sel.zb) { vmovaps(xmm0, ptr[r9 + offsetof(GSVertexSW, p)]); // v.p if(m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); vcvttps2dq(xmm9, xmm0); vpshufhw(xmm9, xmm9, _MM_SHUFFLE(2, 2, 2, 2)); vpshufd(xmm9, xmm9, _MM_SHUFFLE(2, 2, 2, 2)); vpaddw(xmm9, ptr[rdx + 16 * 6]); } if(m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; vshufps(xmm8, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); vaddps(xmm8, ptr[rdx]); } } } else { if(m_sel.ztest) { vmovdqa(xmm8, ptr[r11 + offsetof(GSScanlineLocalData, p.z)]); } } if(m_sel.fb) { if(m_sel.edge || m_sel.tfx != TFX_NONE) { vmovaps(xmm0, ptr[r9 + offsetof(GSVertexSW, t)]); // v.t } if(m_sel.edge) { vpshufhw(xmm1, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); vpsrlw(xmm1, 9); vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.cov)], xmm1); } if(m_sel.tfx != TFX_NONE) { if(m_sel.fst) { // GSVector4i vti(vt); vcvttps2dq(xmm0, xmm0); // s = vti.xxxx() + m_local.d[skip].s; // t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t; vpshufd(xmm10, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vpshufd(xmm11, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]); if(!m_sel.sprite || m_sel.mmin) { vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]); } else { if(m_sel.ltf) { vpshuflw(xmm6, xmm11, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(xmm6, 1); } } } else { // s = vt.xxxx() + m_local.d[skip].s; // t = vt.yyyy() + m_local.d[skip].t; // q = vt.zzzz() + m_local.d[skip].q; vshufps(xmm10, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vshufps(xmm11, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); vaddps(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]); vaddps(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]); vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]); } } if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { if(m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); vcvttps2dq(xmm0, ptr[r9 + offsetof(GSVertexSW, c)]); // v.c // vc = vc.upl16(vc.zwxy()); vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); vpunpcklwd(xmm0, xmm1); // rb = vc.xxxx().add16(m_local.d[skip].rb); // ga = vc.zzzz().add16(m_local.d[skip].ga); vpshufd(xmm13, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vpshufd(xmm14, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); vpaddw(xmm13, ptr[rdx + offsetof(GSScanlineLocalData::skip, rb)]); vpaddw(xmm14, ptr[rdx + offsetof(GSScanlineLocalData::skip, ga)]); } else { vmovdqa(xmm13, ptr[r11 + offsetof(GSScanlineLocalData, c.rb)]); vmovdqa(xmm14, ptr[r11 + offsetof(GSScanlineLocalData, c.ga)]); } } } }
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp1, tmp2, eax, ecx, edx); jmp(start); address L_tbl = (address)_L_tbl; address log2 = (address)_log2; address coeff = (address)_coeff; bind(start); subq(rsp, 24); movsd(Address(rsp, 0), xmm0); mov64(rax, 0x3ff0000000000000); movdq(xmm2, rax); mov64(rdx, 0x77f0000000000000); movdq(xmm3, rdx); movl(ecx, 32768); movdl(xmm4, rcx); mov64(tmp1, 0xffffe00000000000); movdq(xmm5, tmp1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); movl(ecx, 16352); psrlq(xmm0, 27); lea(tmp2, ExternalAddress(L_tbl)); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); subl(eax, 16); cmpl(eax, 32736); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); bind(L_2TAG_PACKET_1_0_2); paddd(xmm0, xmm4); por(xmm1, xmm3); movdl(edx, xmm0); psllq(xmm0, 29); pand(xmm5, xmm1); pand(xmm0, xmm6); subsd(xmm1, xmm5); mulpd(xmm5, xmm0); andl(eax, 32752); subl(eax, ecx); cvtsi2sdl(xmm7, eax); mulsd(xmm1, xmm0); movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL subsd(xmm5, xmm2); andl(edx, 16711680); shrl(edx, 12); movdqu(xmm0, Address(tmp2, edx)); movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL addsd(xmm1, xmm5); movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL mulsd(xmm6, xmm7); movddup(xmm5, xmm1); mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL mulsd(xmm3, xmm1); addsd(xmm0, xmm6); mulpd(xmm4, xmm5); mulpd(xmm5, xmm5); movddup(xmm6, xmm0); addsd(xmm0, xmm1); addpd(xmm4, xmm2); mulpd(xmm3, xmm5); subsd(xmm6, xmm0); mulsd(xmm4, xmm1); pshufd(xmm2, xmm0, 238); addsd(xmm1, xmm6); mulsd(xmm5, xmm5); addsd(xmm7, xmm2); addpd(xmm4, xmm3); addsd(xmm1, xmm7); mulpd(xmm4, xmm5); addsd(xmm1, xmm4); pshufd(xmm5, xmm4, 238); addsd(xmm1, xmm5); addsd(xmm0, xmm1); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movq(xmm0, Address(rsp, 0)); movq(xmm1, Address(rsp, 0)); addl(eax, 16); cmpl(eax, 32768); jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); cmpl(eax, 16); jcc(Assembler::below, L_2TAG_PACKET_3_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); cmpl(edx, 0); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_3_0_2); xorpd(xmm1, xmm1); addsd(xmm1, xmm0); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); movl(eax, 18416); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); psrlq(xmm0, 27); movl(ecx, 18416); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); addl(ecx, ecx); cmpl(ecx, -2097152); jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_6_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 32752); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movl(Address(rsp, 16), 3); jmp(L_2TAG_PACKET_8_0_2); bind(L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 49136); pinsrw(xmm0, eax, 3); divsd(xmm0, xmm1); movl(Address(rsp, 16), 2); bind(L_2TAG_PACKET_8_0_2); movq(Address(rsp, 8), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 8)); bind(B1_5); addq(rsp, 24); }
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { const int aarch64_code_length = VtableStub::pd_code_size_limit(true); VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), aarch64_code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ lea(r19, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r19)); } #endif // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); // get receiver klass address npe_addr = __ pc(); __ load_klass(r19, j_rarg0); #ifndef PRODUCT if (DebugVtables) { Label L; // check offset vs vtable length __ ldrw(rscratch1, Address(r19, Klass::vtable_length_offset())); __ cmpw(rscratch1, vtable_index * vtableEntry::size()); __ br(Assembler::GT, L); __ enter(); __ mov(r2, vtable_index); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); __ leave(); __ bind(L); } #endif // PRODUCT __ lookup_virtual_method(r19, vtable_index, rmethod); if (DebugVtables) { Label L; __ cbz(rmethod, L); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ cbnz(rscratch1, L); __ stop("Vtable entry is NULL"); __ bind(L); } // r0: receiver klass // rmethod: Method* // r2: receiver address ame_addr = __ pc(); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ br(rscratch1); __ flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", vtable_index, p2i(s->entry_point()), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); return s; }
VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get // away with. If you add code here, bump the code stub size // returned by pd_code_size_limit! const int code_length = VtableStub::pd_code_size_limit(false); VtableStub* s = new(code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), code_length); MacroAssembler* masm = new MacroAssembler(&cb); #ifndef PRODUCT if (CountCompiledCalls) { __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r10)); } #endif // Entry arguments: // rscratch2: Interface // j_rarg0: Receiver // Free registers (non-args) are r0 (interface), rmethod // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); // Most registers are in use; we'll use r0, rmethod, r10, r11 __ load_klass(r10, j_rarg0); Label throw_icce; // Get Method* and entrypoint for compiler __ lookup_interface_method(// inputs: rec. class, interface, itable index r10, rscratch2, itable_index, // outputs: method, scan temp. reg rmethod, r11, throw_icce); // method (rmethod): Method* // j_rarg0: receiver #ifdef ASSERT if (DebugVtables) { Label L2; __ cbz(rmethod, L2); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ cbnz(rscratch1, L2); __ stop("compiler entrypoint is null"); __ bind(L2); } #endif // ASSERT // rmethod: Method* // j_rarg0: receiver address ame_addr = __ pc(); __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ br(rscratch1); __ bind(throw_icce); __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", itable_index, p2i(s->entry_point()), (int)(s->code_end() - s->entry_point()), (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); return s; }
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { const char *name; switch (type) { case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; case T_BYTE: name = "jni_fast_GetByteField"; break; case T_CHAR: name = "jni_fast_GetCharField"; break; case T_SHORT: name = "jni_fast_GetShortField"; break; case T_INT: name = "jni_fast_GetIntField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize); address fast_entry = b->instructions_begin(); CodeBuffer cbuf(fast_entry, b->instructions_size()); MacroAssembler* masm = new MacroAssembler(&cbuf); Label slow; // stack layout: offset from rsp (in words): // return pc 0 // jni env 1 // obj 2 // jfieldID 3 ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); __ mov32 (rcx, counter); __ testb (rcx, 1); __ jcc (Assembler::notZero, slow); if (os::is_MP()) { __ mov(rax, rcx); __ andptr(rax, 1); // rax, must end up 0 __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); // obj, notice rax, is 0. // rdx is data dependent on rcx. } else { __ movptr (rdx, Address(rsp, 2*wordSize)); // obj } __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID __ movptr(rdx, Address(rdx, 0)); // *obj __ shrptr (rax, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); switch (type) { case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break; case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break; case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break; case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break; case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break; default: ShouldNotReachHere(); } Address ca1; if (os::is_MP()) { __ lea(rdx, counter); __ xorptr(rdx, rax); __ xorptr(rdx, rax); __ cmp32(rcx, Address(rdx, 0)); // ca1 is the same as ca because // rax, ^ counter_addr ^ rax, = address // ca1 is data dependent on rax,. } else { __ cmp32(rcx, counter); } __ jcc (Assembler::notEqual, slow); #ifndef _WINDOWS __ ret (0); #else // __stdcall calling convention __ ret (3*wordSize); #endif slowcase_entry_pclist[count++] = __ pc(); __ bind (slow); address slow_case_addr; switch (type) { case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; case T_INT: slow_case_addr = jni_GetIntField_addr(); } // tail call __ jump (ExternalAddress(slow_case_addr)); __ flush (); #ifndef _WINDOWS return fast_entry; #else switch (type) { case T_BOOLEAN: jni_fast_GetBooleanField_fp = (GetBooleanField_t)fast_entry; break; case T_BYTE: jni_fast_GetByteField_fp = (GetByteField_t)fast_entry; break; case T_CHAR: jni_fast_GetCharField_fp = (GetCharField_t)fast_entry; break; case T_SHORT: jni_fast_GetShortField_fp = (GetShortField_t)fast_entry; break; case T_INT: jni_fast_GetIntField_fp = (GetIntField_t)fast_entry; } return os::win32::fast_jni_accessor_wrapper(type); #endif }
static char* gen_invoke_common_managed_func(char* stub) { // Defines stack alignment on managed function enter. const I_32 STACK_ALIGNMENT = MANAGED_STACK_ALIGNMENT; const I_32 STACK_ALIGNMENT_MASK = ~(STACK_ALIGNMENT - 1); const char * LOOP_BEGIN = "loop_begin"; const char * LOOP_END = "loop_end"; // [ebp + 8] - args // [ebp + 12] - size // [ebp + 16] - func const I_32 STACK_ARGS_OFFSET = 8; const I_32 STACK_NARGS_OFFSET = 12; const I_32 STACK_FUNC_OFFSET = 16; const I_32 STACK_CALLEE_SAVED_OFFSET = -12; tl::MemoryPool pool; LilCguLabelAddresses labels(&pool, stub); // Initialize ebp-based stack frame. stub = push(stub, ebp_opnd); stub = mov(stub, ebp_opnd, esp_opnd); // Preserve callee-saved registers. stub = push(stub, ebx_opnd); stub = push(stub, esi_opnd); stub = push(stub, edi_opnd); // Load an array of arguments ('args') and its size from the stack. stub = mov(stub, eax_opnd, M_Base_Opnd(ebp_reg, STACK_ARGS_OFFSET)); stub = mov(stub, ecx_opnd, M_Base_Opnd(ebp_reg, STACK_NARGS_OFFSET)); // Align memory stack. stub = lea(stub, ebx_opnd, M_Index_Opnd(n_reg, ecx_reg, 4, 4)); stub = mov(stub, esi_opnd, ebx_opnd); stub = neg(stub, esi_opnd); stub = alu(stub, add_opc, esi_opnd, esp_opnd); stub = alu(stub, and_opc, esi_opnd, Imm_Opnd(size_32, STACK_ALIGNMENT_MASK)); stub = alu(stub, add_opc, ebx_opnd, esi_opnd); stub = mov(stub, esp_opnd, ebx_opnd); // Load a pointer to the last argument of 'args' array. stub = lea(stub, eax_opnd, M_Index_Opnd(eax_reg, ecx_reg, -4, 4)); stub = alu(stub, sub_opc, eax_opnd, esp_opnd); stub = alu(stub, or_opc, ecx_opnd, ecx_opnd); stub = branch8(stub, Condition_Z, Imm_Opnd(size_8, 0)); labels.add_patch_to_label(LOOP_END, stub - 1, LPT_Rel8); // LOOP_BEGIN: // Push inputs on the stack. labels.define_label(LOOP_BEGIN, stub, false); stub = push(stub, M_Index_Opnd(esp_reg, eax_reg, 0, 1)); stub = loop(stub, Imm_Opnd(size_8, 0)); labels.add_patch_to_label(LOOP_BEGIN, stub - 1, LPT_Rel8); // LOOP_END: labels.define_label(LOOP_END, stub, false); // Call target function. stub = mov(stub, eax_opnd, M_Base_Opnd(ebp_reg, STACK_FUNC_OFFSET)); stub = call(stub, eax_opnd); // Restore callee-saved registers from the stack. stub = lea(stub, esp_opnd, M_Base_Opnd(ebp_reg, STACK_CALLEE_SAVED_OFFSET)); stub = pop(stub, edi_opnd); stub = pop(stub, esi_opnd); stub = pop(stub, ebx_opnd); // Leave current frame. stub = pop(stub, ebp_opnd); return stub; }
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { const char *name; switch (type) { case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; case T_BYTE: name = "jni_fast_GetByteField"; break; case T_CHAR: name = "jni_fast_GetCharField"; break; case T_SHORT: name = "jni_fast_GetShortField"; break; case T_INT: name = "jni_fast_GetIntField"; break; case T_LONG: name = "jni_fast_GetLongField"; break; case T_FLOAT: name = "jni_fast_GetFloatField"; break; case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; default: ShouldNotReachHere(); } ResourceMark rm; BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); CodeBuffer cbuf(blob); MacroAssembler* masm = new MacroAssembler(&cbuf); address fast_entry = __ pc(); Label slow; unsigned long offset; __ adrp(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); Address safepoint_counter_addr(rcounter_addr, offset); __ ldrw(rcounter, safepoint_counter_addr); __ andw(rscratch1, rcounter, 1); __ cbnzw(rscratch1, slow); __ eor(robj, c_rarg1, rcounter); __ eor(robj, robj, rcounter); // obj, since // robj ^ rcounter ^ rcounter == robj // robj is address dependent on rcounter. __ ldr(robj, Address(robj, 0)); // *obj __ lsr(roffset, c_rarg2, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); // Used by the segfault handler switch (type) { case T_BOOLEAN: __ ldrb (result, Address(robj, roffset)); break; case T_BYTE: __ ldrsb (result, Address(robj, roffset)); break; case T_CHAR: __ ldrh (result, Address(robj, roffset)); break; case T_SHORT: __ ldrsh (result, Address(robj, roffset)); break; case T_FLOAT: __ ldrw (result, Address(robj, roffset)); break; case T_INT: __ ldrsw (result, Address(robj, roffset)); break; case T_DOUBLE: case T_LONG: __ ldr (result, Address(robj, roffset)); break; default: ShouldNotReachHere(); } // counter_addr is address dependent on result. __ eor(rcounter_addr, rcounter_addr, result); __ eor(rcounter_addr, rcounter_addr, result); __ ldrw(rscratch1, safepoint_counter_addr); __ cmpw(rcounter, rscratch1); __ br (Assembler::NE, slow); switch (type) { case T_FLOAT: __ fmovs(v0, result); break; case T_DOUBLE: __ fmovd(v0, result); break; default: __ mov(r0, result); break; } __ ret(lr); slowcase_entry_pclist[count++] = __ pc(); __ bind(slow); address slow_case_addr; switch (type) { case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; case T_INT: slow_case_addr = jni_GetIntField_addr(); break; case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } { __ enter(); __ lea(rscratch1, ExternalAddress(slow_case_addr)); __ blr(rscratch1); __ maybe_isb(); __ leave(); __ ret(lr); } __ flush (); return fast_entry; }
void GPUDrawScanlineCodeGenerator::Init() { mov(eax, dword[esp + _top]); // uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left; mov(edi, eax); shl(edi, 10 + m_sel.scalex); add(edi, edx); lea(edi, ptr[edi * 2 + (size_t)m_local.gd->vm]); // int steps = pixels - 8; sub(ecx, 8); if(m_sel.dtd) { // dither = GSVector4i::load<false>(&m_dither[top & 3][left & 3]); and(eax, 3); shl(eax, 5); and(edx, 3); shl(edx, 1); movdqu(xmm0, ptr[eax + edx + (size_t)m_dither]); movdqa(ptr[&m_local.temp.dither], xmm0); } mov(edx, dword[esp + _v]); if(m_sel.tme) { mov(esi, dword[&m_local.gd->tex]); // GSVector4i vt = GSVector4i(v.t).xxzzl(); cvttps2dq(xmm4, ptr[edx + offsetof(GSVertexSW, t)]); pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); // s = vt.xxxx().add16(m_local.d.s); // t = vt.yyyy().add16(m_local.d.t); pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddw(xmm2, ptr[&m_local.d.s]); if(!m_sel.sprite) { paddw(xmm3, ptr[&m_local.d.t]); } else { if(m_sel.ltf) { movdqa(xmm0, xmm3); psllw(xmm0, 8); psrlw(xmm0, 1); movdqa(ptr[&m_local.temp.vf], xmm0); } } movdqa(ptr[&m_local.temp.s], xmm2); movdqa(ptr[&m_local.temp.t], xmm3); } if(m_sel.tfx != 3) // != decal { // GSVector4i vc = GSVector4i(v.c).xxzzlh(); cvttps2dq(xmm6, ptr[edx + offsetof(GSVertexSW, c)]); pshuflw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); // r = vc.xxxx(); // g = vc.yyyy(); // b = vc.zzzz(); pshufd(xmm4, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); if(m_sel.iip) { // r = r.add16(m_local.d.r); // g = g.add16(m_local.d.g); // b = b.add16(m_local.d.b); paddw(xmm4, ptr[&m_local.d.r]); paddw(xmm5, ptr[&m_local.d.g]); paddw(xmm6, ptr[&m_local.d.b]); } movdqa(ptr[&m_local.temp.r], xmm4); movdqa(ptr[&m_local.temp.g], xmm5); movdqa(ptr[&m_local.temp.b], xmm6); } }
void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list, void** vtable, char** md_top, char* md_end, char** mc_top, char* mc_end) { intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); *(intptr_t *)(*md_top) = vtable_bytes; *md_top += sizeof(intptr_t); void** dummy_vtable = (void**)*md_top; *vtable = dummy_vtable; *md_top += vtable_bytes; // Get ready to generate dummy methods. CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); MacroAssembler* masm = new MacroAssembler(&cb); Label common_code; for (int i = 0; i < vtbl_list_size; ++i) { for (int j = 0; j < num_virtuals; ++j) { dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); // Load eax with a value indicating vtable/offset pair. // -- bits[ 7..0] (8 bits) which virtual method in table? // -- bits[12..8] (5 bits) which virtual method table? // -- must fit in 13-bit instruction immediate field. __ movl(rax, (i << 8) + j); __ jmp(common_code); } } __ bind(common_code); // Expecting to be called with "thiscall" convections -- the arguments // are on the stack and the "this" pointer is in c_rarg0. In addition, rax // was set (above) to the offset of the method in the table. __ push(c_rarg1); // save & free register __ push(c_rarg0); // save "this" __ mov(c_rarg0, rax); __ shrptr(c_rarg0, 8); // isolate vtable identifier. __ shlptr(c_rarg0, LogBytesPerWord); __ lea(c_rarg1, ExternalAddress((address)vtbl_list)); // ptr to correct vtable list. __ addptr(c_rarg1, c_rarg0); // ptr to list entry. __ movptr(c_rarg1, Address(c_rarg1, 0)); // get correct vtable address. __ pop(c_rarg0); // restore "this" __ movptr(Address(c_rarg0, 0), c_rarg1); // update vtable pointer. __ andptr(rax, 0x00ff); // isolate vtable method index __ shlptr(rax, LogBytesPerWord); __ addptr(rax, c_rarg1); // address of real method pointer. __ pop(c_rarg1); // restore register. __ movptr(rax, Address(rax, 0)); // get real method pointer. __ jmp(rax); // jump to the real method. __ flush(); *mc_top = (char*)__ pc(); }