int main(){ printf("srl(0xfffffff0,8):%x\n",srl(0xfffffff0,8)); printf("srl(0x0ffffff0,8):%x\n",srl(0x0ffffff0,8)); printf("sra(0xfffffff0,8):%x\n",sra(0xfffffff0,8)); printf("sra(0x0ffffff0,8):%x\n",sra(0x0ffffff0,8)); return 0; }
int main(int argc, char *argv[]) { int i, v, s; for (i = 1; i < argc; i++) { v = atoi(argv[i]); s = 0; printf("0x%x >> %d: Logical 0x%x, Arithmetic 0x%x\n", v, s, srl(v,s), sra(v,s)); s = 2; printf("0x%x >> %d: Logical 0x%x, Arithmetic 0x%x\n", v, s, srl(v,s), sra(v,s)); s = 31; printf("0x%x >> %d: Logical 0x%x, Arithmetic 0x%x\n", v, s, srl(v,s), sra(v,s)); } return 0; }
void MacroAssembler::read_ccr_trap(Register ccr_save) { // Execute a trap to get the PSR, mask and shift // to get the condition codes. get_psr_trap(); nop(); set(PSR_ICC, ccr_save); and3(O0, ccr_save, ccr_save); srl(ccr_save, PSR_ICC_SHIFT, ccr_save); }
__m128 fastlog_ps(__m128 x) { typedef SSEVector4f V4f; typedef SSEVector4i V4i; // Constants const V4f min_normal(constants::min_norm_pos.ps); const V4f inv_mantissa_mask(constants::inv_mant_mask.ps); const V4f const_1(constants::ps_1.ps); const V4i const_127(constants::pi32_0x7f.pi); const V4f log_p0(constants::am_log_p0.ps); const V4f log_p1(constants::am_log_p1.ps); const V4f log_p2(constants::am_log_p2.ps); const V4f log_q0(constants::am_log_q0.ps); const V4f log_q1(constants::am_log_q1.ps); const V4f log_q2(constants::am_log_q2.ps); const V4f log_c0(constants::am_log_c0.ps); // Kill invalid values (ignoring NaN and Inf) const V4f x0 = max(x, min_normal); // Kill the exponent and combine with the exponent of 1.0f to get the // actual embedded mantissa as a valid floating point value: // a value in the range [1.0, 2.0) const V4f mantissa = (x0 & inv_mantissa_mask) | const_1; const V4f v_min1 = mantissa - const_1; const V4f v_plus1 = mantissa + const_1; // Extract the original exponent and undo the bias const V4i biasedExponent = srl(castAsInt(x0), 23); const V4f origExponent = toFloat(biasedExponent - const_127); V4f vFrac = v_min1 * rcp(v_plus1); // Is it worth it to use rcp_nr? vFrac += vFrac; const V4f vFracSqr = vFrac * vFrac; // Evaluate the polynomial const V4f polyP = ((((log_p0 * vFracSqr) + log_p1) * vFracSqr) + log_p2) * vFracSqr; const V4f polyQ = (((log_q0 * vFracSqr) + log_q1) * vFracSqr) + log_q2; const V4f poly = polyP * rcp(polyQ); // Use rcp_nr? const V4f logApprox = poly * vFrac; // Scale by log(2) to get the natural logarithm of the exponent part const V4f logExpPart = origExponent * log_c0; // Combine the different parts const V4f result = logApprox + vFrac + logExpPart; return result; }
// Inserting a node void bstree::insert(int x,nodeptr &p) { if (p == NULL) { p = new node; p->element = x; p->left=NULL; p->right = NULL; p->height=0; if (p==NULL) cout<<"\nOut of Space"; } else { if (x<p->element) { insert(x,p->left); if ((bsheight(p->left) - bsheight(p->right))==2) { if (x < p->left->element) p=srl(p); else p = drl(p); } } else if (x>p->element) { insert(x,p->right); if ((bsheight(p->right) - bsheight(p->left))==2) { if (x > p->right->element) p=srr(p); else p = drr(p); } } else cout<<"\nElement Exists"; } int m,n,d; m=bsheight(p->left); n=bsheight(p->right); d=max(m,n); p->height = d + 1; }
// Inserting a node void IntervallTree_bed::insert(long start, long stop, Leaf *&p) { if (p == NULL) { p = new Leaf(start, stop); if (p == NULL) { std::cout << "Out of Space\n" << std::endl; } } else { long score = p->overlap(start, stop); if (score > 0) { insert(start, stop, p->left); if ((bsheight(p->left) - bsheight(p->right)) == 2) { score = p->left->overlap(start, stop); if (score > 0) { p = srl(p); } else { p = drl(p); } } } else if (score < 0) { insert(start, stop, p->right); if ((bsheight(p->right) - bsheight(p->left)) == 2) { score = p->right->overlap(start, stop); if (score < 0) { p = srr(p); } else { p = drr(p); } } } else { //overlaps! std::cerr << "Two regions overlap and are thus ignored:" << std::endl; } } int m, n, d; m = bsheight(p->left); n = bsheight(p->right); d = max(m, n); p->set_height(d + 1); }
void show_srl(unsigned x, int k) { printf("srl(0x%08X, %d) = 0x%08X\n", x, k, srl(x, k)); }
nodeptr bstree::drr(nodeptr &p1) { p1->right = srl(p1->right); return srr(p1); }
nodeptr bstree:: drl(nodeptr &p1) { p1->left=srr(p1->left); return srl(p1); }
// LP64 passes floating point arguments in F1, F3, F5, etc. instead of // O0, O1, O2 etc.. // Doubles are passed in D0, D2, D4 // We store the signature of the first 16 arguments in the first argument // slot because it will be overwritten prior to calling the native // function, with the pointer to the JNIEnv. // If LP64 there can be up to 16 floating point arguments in registers // or 6 integer registers. address AbstractInterpreterGenerator::generate_slow_signature_handler() { enum { non_float = 0, float_sig = 1, double_sig = 2, sig_mask = 3 }; address entry = __ pc(); Argument argv(0, true); // We are in the jni transition frame. Save the last_java_frame corresponding to the // outer interpreter frame // __ set_last_Java_frame(FP, noreg); // make sure the interpreter frame we've pushed has a valid return pc __ mov(O7, I7); __ mov(Lmethod, G3_scratch); __ mov(Llocals, G4_scratch); __ save_frame(0); __ mov(G2_thread, L7_thread_cache); __ add(argv.address_in_frame(), O3); __ mov(G2_thread, O0); __ mov(G3_scratch, O1); __ call(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), relocInfo::runtime_call_type); __ delayed()->mov(G4_scratch, O2); __ mov(L7_thread_cache, G2_thread); __ reset_last_Java_frame(); // load the register arguments (the C code packed them as varargs) Address Sig = argv.address_in_frame(); // Argument 0 holds the signature __ ld_ptr( Sig, G3_scratch ); // Get register argument signature word into G3_scratch __ mov( G3_scratch, G4_scratch); __ srl( G4_scratch, 2, G4_scratch); // Skip Arg 0 Label done; for (Argument ldarg = argv.successor(); ldarg.is_float_register(); ldarg = ldarg.successor()) { Label NonFloatArg; Label LoadFloatArg; Label LoadDoubleArg; Label NextArg; Address a = ldarg.address_in_frame(); __ andcc(G4_scratch, sig_mask, G3_scratch); __ br(Assembler::zero, false, Assembler::pt, NonFloatArg); __ delayed()->nop(); __ cmp(G3_scratch, float_sig ); __ br(Assembler::equal, false, Assembler::pt, LoadFloatArg); __ delayed()->nop(); __ cmp(G3_scratch, double_sig ); __ br(Assembler::equal, false, Assembler::pt, LoadDoubleArg); __ delayed()->nop(); __ bind(NonFloatArg); // There are only 6 integer register arguments! if ( ldarg.is_register() ) __ ld_ptr(ldarg.address_in_frame(), ldarg.as_register()); else { // Optimization, see if there are any more args and get out prior to checking // all 16 float registers. My guess is that this is rare. // If is_register is false, then we are done the first six integer args. __ br_null_short(G4_scratch, Assembler::pt, done); } __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(LoadFloatArg); __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4); __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(LoadDoubleArg); __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() ); __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(NextArg); } __ bind(done); __ ret(); __ delayed()-> restore(O0, 0, Lscratch); // caller's Lscratch gets the result handler return entry; }
OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { OopMapSet* oop_maps = NULL; // for better readability const bool must_gc_arguments = true; const bool dont_gc_arguments = false; // stub code & info for the different stubs switch (id) { case forward_exception_id: { oop_maps = generate_handle_exception(id, sasm); } break; case new_instance_id: case fast_new_instance_id: case fast_new_instance_init_check_id: { Register G5_klass = G5; // Incoming Register O0_obj = O0; // Outgoing if (id == new_instance_id) { __ set_info("new_instance", dont_gc_arguments); } else if (id == fast_new_instance_id) { __ set_info("fast new_instance", dont_gc_arguments); } else { assert(id == fast_new_instance_init_check_id, "bad StubID"); __ set_info("fast new_instance init check", dont_gc_arguments); } if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && UseTLAB && FastTLABRefill) { Label slow_path; Register G1_obj_size = G1; Register G3_t1 = G3; Register G4_t2 = G4; assert_different_registers(G5_klass, G1_obj_size, G3_t1, G4_t2); // Push a frame since we may do dtrace notification for the // allocation which requires calling out and we don't want // to stomp the real return address. __ save_frame(0); if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized __ ldub(G5_klass, in_bytes(InstanceKlass::init_state_offset()), G3_t1); __ cmp_and_br_short(G3_t1, InstanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path); } #ifdef ASSERT // assert object can be fast path allocated { Label ok, not_ok; __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); // make sure it's an instance (LH > 0) __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); __ br(Assembler::zero, false, Assembler::pn, ok); __ delayed()->nop(); __ bind(not_ok); __ stop("assert(can be fast path allocated)"); __ should_not_reach_here(); __ bind(ok); } #endif // ASSERT // if we got here then the TLAB allocation failed, so try // refilling the TLAB or allocating directly from eden. Label retry_tlab, try_eden; __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G5_klass __ bind(retry_tlab); // get the instance size __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); __ verify_oop(O0_obj); __ mov(O0, I0); __ ret(); __ delayed()->restore(); __ bind(try_eden); // get the instance size __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); __ verify_oop(O0_obj); __ mov(O0, I0); __ ret(); __ delayed()->restore(); __ bind(slow_path); // pop this frame so generate_stub_call can push it's own __ restore(); } oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_instance), G5_klass); // I0->O0: new instance } break; case counter_overflow_id: // G4 contains bci, G5 contains method oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), G4, G5); break; case new_type_array_id: case new_object_array_id: { Register G5_klass = G5; // Incoming Register G4_length = G4; // Incoming Register O0_obj = O0; // Outgoing Address klass_lh(G5_klass, Klass::layout_helper_offset()); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask == 0xFF, "bytewise"); // Use this offset to pick out an individual byte of the layout_helper: const int klass_lh_header_size_offset = ((BytesPerInt - 1) // 3 - 2 selects byte {0,1,0,0} - Klass::_lh_header_size_shift / BitsPerByte); if (id == new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); } else { __ set_info("new_object_array", dont_gc_arguments); } #ifdef ASSERT // assert object type is really an array of the proper kind { Label ok; Register G3_t1 = G3; __ ld(klass_lh, G3_t1); __ sra(G3_t1, Klass::_lh_array_tag_shift, G3_t1); int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); __ cmp_and_brx_short(G3_t1, tag, Assembler::equal, Assembler::pt, ok); __ stop("assert(is an array klass)"); __ should_not_reach_here(); __ bind(ok); } #endif // ASSERT if (UseTLAB && FastTLABRefill) { Label slow_path; Register G1_arr_size = G1; Register G3_t1 = G3; Register O1_t2 = O1; assert_different_registers(G5_klass, G4_length, G1_arr_size, G3_t1, O1_t2); // check that array length is small enough for fast path __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1); __ cmp_and_br_short(G4_length, G3_t1, Assembler::greaterUnsigned, Assembler::pn, slow_path); // if we got here then the TLAB allocation failed, so try // refilling the TLAB or allocating directly from eden. Label retry_tlab, try_eden; __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves G4_length and G5_klass __ bind(retry_tlab); // get the allocation size: (length << (layout_helper & 0x1F)) + header_size __ ld(klass_lh, G3_t1); __ sll(G4_length, G3_t1, G1_arr_size); __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1); __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1); __ add(G1_arr_size, G3_t1, G1_arr_size); __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size); // align up __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size); __ tlab_allocate(O0_obj, G1_arr_size, 0, G3_t1, slow_path); // preserves G1_arr_size __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2); __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset); __ sub(G1_arr_size, G3_t1, O1_t2); // body length __ add(O0_obj, G3_t1, G3_t1); // body start __ initialize_body(G3_t1, O1_t2); __ verify_oop(O0_obj); __ retl(); __ delayed()->nop(); __ bind(try_eden); // get the allocation size: (length << (layout_helper & 0x1F)) + header_size __ ld(klass_lh, G3_t1); __ sll(G4_length, G3_t1, G1_arr_size); __ srl(G3_t1, Klass::_lh_header_size_shift, G3_t1); __ and3(G3_t1, Klass::_lh_header_size_mask, G3_t1); __ add(G1_arr_size, G3_t1, G1_arr_size); __ add(G1_arr_size, MinObjAlignmentInBytesMask, G1_arr_size); __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size); __ eden_allocate(O0_obj, G1_arr_size, 0, G3_t1, O1_t2, slow_path); // preserves G1_arr_size __ incr_allocated_bytes(G1_arr_size, G3_t1, O1_t2); __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2); __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset); __ sub(G1_arr_size, G3_t1, O1_t2); // body length __ add(O0_obj, G3_t1, G3_t1); // body start __ initialize_body(G3_t1, O1_t2); __ verify_oop(O0_obj); __ retl(); __ delayed()->nop(); __ bind(slow_path); } if (id == new_type_array_id) { oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_type_array), G5_klass, G4_length); } else { oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_object_array), G5_klass, G4_length); } // I0 -> O0: new array } break; case new_multi_array_id: { // O0: klass // O1: rank // O2: address of 1st dimension __ set_info("new_multi_array", dont_gc_arguments); oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_multi_array), I0, I1, I2); // I0 -> O0: new multi array } break; case register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); // load the klass and check the has finalizer flag Label register_finalizer; Register t = O1; __ load_klass(O0, t); __ ld(t, in_bytes(Klass::access_flags_offset()), t); __ set(JVM_ACC_HAS_FINALIZER, G3); __ andcc(G3, t, G0); __ br(Assembler::notZero, false, Assembler::pt, register_finalizer); __ delayed()->nop(); // do a leaf return __ retl(); __ delayed()->nop(); __ bind(register_finalizer); OopMap* oop_map = save_live_registers(sasm); int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), I0); oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); // Now restore all the live registers restore_live_registers(sasm); __ ret(); __ delayed()->restore(); } break; case throw_range_check_failed_id: { __ set_info("range_check_failed", dont_gc_arguments); // arguments will be discarded // G4: index oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); } break; case throw_index_exception_id: { __ set_info("index_range_check_failed", dont_gc_arguments); // arguments will be discarded // G4: index oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); } break; case throw_div0_exception_id: { __ set_info("throw_div0_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); } break; case throw_null_pointer_exception_id: { __ set_info("throw_null_pointer_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); } break; case handle_exception_id: { __ set_info("handle_exception", dont_gc_arguments); oop_maps = generate_handle_exception(id, sasm); } break; case handle_exception_from_callee_id: { __ set_info("handle_exception_from_callee", dont_gc_arguments); oop_maps = generate_handle_exception(id, sasm); } break; case unwind_exception_id: { // O0: exception // I7: address of call to this method __ set_info("unwind_exception", dont_gc_arguments); __ mov(Oexception, Oexception->after_save()); __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save()); __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Oissuing_pc->after_save()); __ verify_not_null_oop(Oexception->after_save()); // Restore SP from L7 if the exception PC is a method handle call site. __ mov(O0, G5); // Save the target address. __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0); __ tst(L0); // Condition codes are preserved over the restore. __ restore(); __ jmp(G5, 0); __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP); // Restore SP if required. } break; case throw_array_store_exception_id: { __ set_info("throw_array_store_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); } break; case throw_class_cast_exception_id: { // G4: object __ set_info("throw_class_cast_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); } break; case throw_incompatible_class_change_error_id: { __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; case slow_subtype_check_id: { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super ); // Arguments : // // ret : G3 // sub : G3, argument, destroyed // super: G1, argument, not changed // raddr: O7, blown by call Label miss; __ save_frame(0); // Blow no registers! __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss); __ mov(1, G3); __ ret(); // Result in G5 is 'true' __ delayed()->restore(); // free copy or add can go here __ bind(miss); __ mov(0, G3); __ ret(); // Result in G5 is 'false' __ delayed()->restore(); // free copy or add can go here } case monitorenter_nofpu_id: case monitorenter_id: { // G4: object // G5: lock address __ set_info("monitorenter", dont_gc_arguments); int save_fpu_registers = (id == monitorenter_id); // make a frame and preserve the caller's caller-save registers OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), G4, G5); oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); restore_live_registers(sasm, save_fpu_registers); __ ret(); __ delayed()->restore(); } break; case monitorexit_nofpu_id: case monitorexit_id: { // G4: lock address // note: really a leaf routine but must setup last java sp // => use call_RT for now (speed can be improved by // doing last java sp setup manually) __ set_info("monitorexit", dont_gc_arguments); int save_fpu_registers = (id == monitorexit_id); // make a frame and preserve the caller's caller-save registers OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), G4); oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); restore_live_registers(sasm, save_fpu_registers); __ ret(); __ delayed()->restore(); } break; case deoptimize_id: { __ set_info("deoptimize", dont_gc_arguments); OopMap* oop_map = save_live_registers(sasm); int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize)); oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); restore_live_registers(sasm); DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); assert(deopt_blob != NULL, "deoptimization blob must have been created"); AddressLiteral dest(deopt_blob->unpack_with_reexecution()); __ jump_to(dest, O0); __ delayed()->restore(); } break; case access_field_patching_id: { __ set_info("access_field_patching", dont_gc_arguments); oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); } break; case load_klass_patching_id: { __ set_info("load_klass_patching", dont_gc_arguments); oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); } break; case load_mirror_patching_id: { __ set_info("load_mirror_patching", dont_gc_arguments); oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); } break; case dtrace_object_alloc_id: { // O0: object __ set_info("dtrace_object_alloc", dont_gc_arguments); // we can't gc here so skip the oopmap but make sure that all // the live registers get saved. save_live_registers(sasm); __ save_thread(L7_thread_cache); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), relocInfo::runtime_call_type); __ delayed()->mov(I0, O0); __ restore_thread(L7_thread_cache); restore_live_registers(sasm); __ ret(); __ delayed()->restore(); } break; #if INCLUDE_ALL_GCS case g1_pre_barrier_slow_id: { // G4: previous value of memory BarrierSet* bs = Universe::heap()->barrier_set(); if (bs->kind() != BarrierSet::G1SATBCTLogging) { __ save_frame(0); __ set((int)id, O1); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); __ should_not_reach_here(); break; } __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); Register pre_val = G4; Register tmp = G1_scratch; Register tmp2 = G3_scratch; Label refill, restart; bool with_frame = false; // I don't know if we can do with-frame. int satb_q_index_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()); int satb_q_buf_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()); __ bind(restart); // Load the index into the SATB buffer. PtrQueue::_index is a // size_t so ld_ptr is appropriate __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); // index == 0? __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill); __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); __ sub(tmp, oopSize, tmp); __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card> // Use return-from-leaf __ retl(); __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset); __ bind(refill); __ save_frame(0); __ mov(pre_val, L0); __ mov(tmp, L1); __ mov(tmp2, L2); __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread), G2_thread); __ mov(L0, pre_val); __ mov(L1, tmp); __ mov(L2, tmp2); __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); __ delayed()->restore(); } break; case g1_post_barrier_slow_id: { BarrierSet* bs = Universe::heap()->barrier_set(); if (bs->kind() != BarrierSet::G1SATBCTLogging) { __ save_frame(0); __ set((int)id, O1); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); __ should_not_reach_here(); break; } __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); Register addr = G4; Register cardtable = G5; Register tmp = G1_scratch; Register tmp2 = G3_scratch; jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; Label not_already_dirty, restart, refill; #ifdef _LP64 __ srlx(addr, CardTableModRefBS::card_shift, addr); #else __ srl(addr, CardTableModRefBS::card_shift, addr); #endif AddressLiteral rs(byte_map_base); __ set(rs, cardtable); // cardtable := <card table base> __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf __ retl(); __ delayed()->nop(); // Not dirty. __ bind(not_already_dirty); // Get cardtable + tmp into a reg by itself __ add(addr, cardtable, tmp2); // First, dirty it. __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). Register tmp3 = cardtable; Register tmp4 = tmp; // these registers are now dead addr = cardtable = tmp = noreg; int dirty_card_q_index_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()); int dirty_card_q_buf_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); __ bind(restart); // Get the index into the update buffer. PtrQueue::_index is // a size_t so ld_ptr is appropriate here. __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); // index == 0? __ cmp_and_brx_short(tmp3, G0, Assembler::equal, Assembler::pn, refill); __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); __ sub(tmp3, oopSize, tmp3); __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := <address_of_card> // Use return-from-leaf __ retl(); __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset); __ bind(refill); __ save_frame(0); __ mov(tmp2, L0); __ mov(tmp3, L1); __ mov(tmp4, L2); __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread), G2_thread); __ mov(L0, tmp2); __ mov(L1, tmp3); __ mov(L2, tmp4); __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); __ delayed()->restore(); } break; #endif // INCLUDE_ALL_GCS case predicate_failed_trap_id: { __ set_info("predicate_failed_trap", dont_gc_arguments); OopMap* oop_map = save_live_registers(sasm); int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); assert(deopt_blob != NULL, "deoptimization blob must have been created"); restore_live_registers(sasm); AddressLiteral dest(deopt_blob->unpack_with_reexecution()); __ jump_to(dest, O0); __ delayed()->restore(); } break; default: { __ set_info("unimplemented entry", dont_gc_arguments); __ save_frame(0); __ set((int)id, O1); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), O1); __ should_not_reach_here(); } break; } return oop_maps; }
Leaf * IntervallTree_bed::drl(Leaf * &p1) { p1->left = srr(p1->left); return srl(p1); }
void CPU::exec32(const Instruction32 &insn) { switch(insn.OP) { case 0x00: { uint32_t &rD = r[insn.spform.rD]; uint32_t &rA = r[insn.spform.rA]; uint32_t &rB = r[insn.spform.rB]; switch(insn.spform.func6) { // nop case 0x00: /* nothing */ break; // br{cond}[l] rA case 0x04: if(conditional(insn.spform.rB)) branch(rA - 4, insn.spform.CU); break; // add[.c] rD, rA, rB case 0x08: rD = add(rA, rB, insn.spform.CU); break; // addc[.c] rD, rA, rB case 0x09: rD = addc(rA, rB, insn.spform.CU); break; // sub[.c] rD, rA, rB case 0x0A: rD = sub(rA, rB, insn.spform.CU); break; // subc[.c] rD, rA, rB case 0x0B: rD = subc(rA, rB, insn.spform.CU); break; // cmp{tcs}.c rA, rB case 0x0C: cmp(rA, rB, insn.spform.rD & 0x03, insn.spform.CU); break; // cmpz{tcs}.c rA, rB case 0x0D: cmp(rA, 0, insn.spform.rD & 0x03, insn.spform.CU); break; // neg[.c] rD, rA case 0x0F: rD = sub(0, rA, insn.spform.CU); break; // and[.c] rD, rA, rB case 0x10: rD = bit_and(rA, rB, insn.spform.CU); break; // or[.c] rD, rA, rB case 0x11: rD = bit_or(rA, rB, insn.spform.CU); break; // not[.c] rD, rA, rB case 0x12: rD = bit_xor(rA, ~0, insn.spform.CU); break; // xor[.c] rD, rA, rB case 0x13: rD = bit_or(rA, rB, insn.spform.CU); break; // bitclr[.c] rD, rA, imm5 case 0x14: rD = bit_and(rA, ~(1 << insn.spform.rB), insn.spform.CU); break; // bitset[.c] rD, rA, imm5 case 0x15: rD = bit_or(rA, 1 << insn.spform.rB, insn.spform.CU); break; // bittst.c rA, imm5 case 0x16: bit_and(rA, 1 << insn.spform.rB, insn.spform.CU); break; // bittgl[.c] rA, imm5 case 0x17: rD = bit_xor(rA, 1 << insn.spform.rB, insn.spform.CU); break; // sll[.c] rA, imm5 case 0x18: rD = sll(rA, insn.spform.rB, insn.spform.CU); break; // srl[.c] rA, imm5 case 0x1A: rD = srl(rA, insn.spform.rB, insn.spform.CU); break; // sra[.c] rA, imm5 case 0x1B: rD = sra(rA, insn.spform.rB, insn.spform.CU); break; // mul rA, rD case 0x20: ce_op(rA, rD, std::multiplies<int64_t>()); break; // mulu rA, rD case 0x21: ce_op(rA, rD, std::multiplies<uint64_t>()); break; // div rA, rD case 0x22: ce_op(rA, rD, std::divides<int64_t>()); break; // divu rA, rD case 0x23: ce_op(rA, rD, std::divides<uint64_t>()); break; // mfce{hl} rD[, rA] case 0x24: switch(insn.spform.rB) { case 0x01: rD = CEL; break; case 0x02: rD = CEH; break; case 0x03: rD = CEH; rA = CEL; break; } break; // mtce{hl} rD[, rA] case 0x25: switch(insn.spform.rB) { case 0x01: CEL = rD; break; case 0x02: CEH = rD; break; case 0x03: CEH = rD; CEL = rA; break; } break; // mfsr rA, Srn case 0x28: rA = sr[insn.spform.rB]; // mtsr rA, Srn case 0x29: sr[insn.spform.rB] = rA; // t{cond} case 0x2A: T = conditional(insn.spform.rB); break; // mv{cond} rD, rA case 0x2B: if(conditional(insn.spform.rB)) rD = rA; break; // extsb[.c] rD, rA case 0x2C: rD = sign_extend(rA, 8); if(insn.spform.CU) basic_flags(rD); break; // extsh[.c] rD, rA case 0x2D: rD = sign_extend(rA, 16); if(insn.spform.CU) basic_flags(rD); break; // extzb[.c] rD, rA case 0x2E: rD = bit_and(rA, 0x000000FF, insn.spform.CU); break; // extzh[.c] rD, rA case 0x2F: rD = bit_and(rA, 0x0000FFFF, insn.spform.CU); break; // slli[.c] rD, rA, imm5 case 0x38: rD = sll(rA, insn.spform.rB, insn.spform.CU); break; // srli[.c] rD, rA, imm5 case 0x3A: rD = srl(rA, insn.spform.rB, insn.spform.CU); break; // srai[.c] rD, rA, imm5 case 0x3B: rD = sra(rA, insn.spform.rB, insn.spform.CU); break; default: debugDump(); } } break; case 0x01: { uint32_t &rD = r[insn.iform.rD]; switch(insn.iform.func3) { // addi[.c] rD, imm16 case 0x00: rD = add(rD, sign_extend(insn.iform.Imm16, 16), insn.iform.CU); break; // cmpi.c rD, imm16 case 0x02: cmp(rD, sign_extend(insn.iform.Imm16, 16), 3, insn.iform.CU); break; // andi.c rD, imm16 case 0x04: rD = bit_and(rD, insn.iform.Imm16, insn.iform.CU); break; // ori.c rD, imm16 case 0x05: rD = bit_or(rD, insn.iform.Imm16, insn.iform.CU); break; // ldi rD, imm16 case 0x06: rD = sign_extend(insn.iform.Imm16, 16); break; default: debugDump(); } } break; case 0x02: { // j[l] imm24 if(insn.jform.LK) link(); // Update PC pc &= 0xFC000000; pc |= (insn.jform.Disp24 << 1) - 4; } break; case 0x03: { uint32_t &rD = r[insn.rixform.rD]; uint32_t &rA = r[insn.rixform.rA]; // Pre-increment rA += sign_extend(insn.rixform.Imm12, 12); switch(insn.rixform.func3) { // lw rD, [rA, imm12]+ case 0x00: rD = miu.readU32(rA); break; // lh rD, [rA, imm12]+ case 0x01: rD = sign_extend(miu.readU16(rA), 16); break; // lhu rD, [rA, imm12]+ case 0x02: rD = miu.readU16(rA); break; // lb rD, [rA, imm12]+ case 0x03: rD = sign_extend(miu.readU8(rA), 8); break; // sw rD, [rA, imm12]+ case 0x04: miu.writeU32(rA, rD); break; // sh rD, [rA, imm12]+ case 0x05: miu.writeU16(rA, rD); break; // lbu rD, [rA, imm12]+ case 0x06: rD = miu.readU8(rA); break; // sb rD, [rA, imm12]+ case 0x07: miu.writeU8(rA, rD); break; default: debugDump(); } } break; case 0x04: { // b{cond}[l] if(conditional(insn.bcform.BC)) { if(insn.bcform.LK) link(); pc += sign_extend(((insn.bcform.Disp18_9 << 9) | insn.bcform.Disp8_0) << 1, 20) - 4; } } break; case 0x05: { uint32_t &rD = r[insn.iform.rD]; uint32_t imm16 = insn.iform.Imm16 << 16; switch(insn.iform.func3) { // addis[.c] rD, imm16 case 0x00: rD = add(rD, imm16, insn.iform.CU); break; // cmpis.c rD, imm16 case 0x02: cmp(rD, imm16, 3, insn.iform.CU); break; // andis.c rD, imm16 case 0x04: rD = bit_and(rD, imm16, insn.iform.CU); break; // oris.c rD, imm16 case 0x05: rD = bit_or(rD, imm16, insn.iform.CU); break; // ldis rD, imm16 case 0x06: rD = imm16; break; default: debugDump(); } } break; case 0x06: { uint32_t &rD = r[insn.crform.rD]; uint32_t &crA = cr[insn.crform.crA]; switch(insn.crform.CR_OP) { // mtcr rD, crA case 0x00: crA = rD; break; // mfcr rD, crA case 0x01: rD = crA; break; // rte case 0x84: branch(cr5 - 4, false); /* TODO: missing PSR */ break; default: debugDump(); } } break; case 0x07: { uint32_t &rD = r[insn.rixform.rD]; uint32_t &rA = r[insn.rixform.rA]; switch(insn.rixform.func3) { // lw rD, [rA]+, imm12 case 0x00: rD = miu.readU32(rA); break; // lh rD, [rA]+, imm12 case 0x01: rD = sign_extend(miu.readU16(rA), 16); break; // lhu rD, [rA]+, imm12 case 0x02: rD = miu.readU16(rA); break; // lb rD, [rA]+, imm12 case 0x03: rD = sign_extend(miu.readU8(rA), 8); break; // sw rD, [rA]+, imm12 case 0x04: miu.writeU32(rA, rD); break; // sh rD, [rA]+, imm12 case 0x05: miu.writeU16(rA, rD); break; // lbu rD, [rA]+, imm12 case 0x06: rD = miu.readU8(rA); break; // sb rD, [rA]+, imm12 case 0x07: miu.writeU8(rA, rD); break; default: debugDump(); } // Post-increment rA += sign_extend(insn.rixform.Imm12, 12); } break; case 0x08: { // addri[.c] rD, rA, imm14 uint32_t &rD = r[insn.riform.rD]; uint32_t &rA = r[insn.riform.rA]; uint32_t imm14 = sign_extend(insn.riform.Imm14, 14); rD = add(rA, imm14, insn.riform.CU); } break; case 0x0C: { // andri[.c] rD, rA, imm14 uint32_t &rD = r[insn.riform.rD]; uint32_t &rA = r[insn.riform.rA]; uint32_t imm14 = insn.riform.Imm14; rD = bit_and(rA, imm14, insn.riform.CU); } break; case 0x0D: { // orri[.c] rD, rA, imm14 uint32_t &rD = r[insn.riform.rD]; uint32_t &rA = r[insn.riform.rA]; uint32_t imm14 = insn.riform.Imm14; rD = bit_or(rA, imm14, insn.riform.CU); } break; case 0x10: { // lw rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); rD = miu.readU32(rA + imm15); } break; case 0x11: { // lh rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); rD = sign_extend(miu.readU16(rA + imm15), 16); } break; case 0x12: { // lhu rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); rD = miu.readU16(rA + imm15); } break; case 0x13: { // lb rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); rD = sign_extend(miu.readU8(rA + imm15), 8); } break; case 0x14: { // sw rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); miu.writeU32(rA + imm15, rD); } break; case 0x15: { // sh rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); miu.writeU16(rA + imm15, rD); } break; case 0x16: { // lbu rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); rD = miu.readU8(rA + imm15); } break; case 0x17: { // sb rD, [rA, imm15] uint32_t &rD = r[insn.mform.rD]; uint32_t &rA = r[insn.mform.rA]; uint32_t imm15 = sign_extend(insn.mform.Imm15, 15); miu.writeU8(rA + imm15, rD); } break; case 0x18: // cache op, [rA, imm15] break; default: debugDump(); } }
void CPU::exec16(const Instruction16 &insn) { switch(insn.OP) { case 0x00: switch(insn.rform.func4) { // nop! case 0x00: /* noting */ break; // mlfh! rDg0, rAg1 case 0x01: g0[insn.rform.rD] = g1[insn.rform.rA]; break; // mhfl! rDg1, rAg0 case 0x02: g1[insn.rform.rD] = g0[insn.rform.rA]; break; // mv! rDg0, rAg0 case 0x03: g0[insn.rform.rD] = g0[insn.rform.rA]; break; // br{cond}! rAg0 case 0x04: if(conditional(insn.rform.rD)) branch(g0[insn.rform.rA] - 2, false); break; // t{cond}! case 0x05: T = conditional(insn.rform.rD); break; default: debugDump(); } break; case 0x01: { uint32_t &rA = g0[insn.rform.rA]; // uint32_t &rD = g0[insn.rform.rD]; switch(insn.rform.func4) { // mtce{lh}! rA case 0x00: switch(insn.rform.rD) { case 0x00: CEL = rA; break; case 0x01: CEH = rA; break; } break; // mfce{lh}! rA case 0x01: switch(insn.rform.rD) { case 0x00: rA = CEL; break; case 0x01: rA = CEH; break; } break; default: debugDump(); } } break; case 0x02: { uint32_t &rA = g0[insn.rform.rA]; uint32_t &rD = g0[insn.rform.rD]; uint32_t &rAh = g0[insn.rhform.rA]; uint32_t &rDh = g[insn.rhform.H][insn.rhform.rD]; switch(insn.rform.func4) { // add! rDg0, rAg0 case 0x00: rD = add(rD, rA, true); break; // sub! rDg0, rAg0 case 0x01: rD = sub(rD, rA, true); break; // neg! rDg0, rAg0 case 0x02: rD = sub(0, rA, true); break; // cmp! rDg0, rAg0 case 0x03: sub(rD, rA, true); break; // and! rDg0, rAg0 case 0x04: rD = bit_and(rD, rA, true); break; // or! rDg0, rAg0 case 0x05: rD = bit_or(rD, rA, true); break; // not! rDg0, rAg0 case 0x06: rD = bit_xor(rA, ~0, true); break; // xor! rDg0, rAg0 case 0x07: rD = bit_xor(rD, rA, true); break; // lw! rDg0, [rAg0] case 0x08: rD = miu.readU32(rA); break; // lh! rDg0, [rAg0] case 0x09: rD = sign_extend(miu.readU16(rA), 16); break; // pop! rDgh, [rAg0] case 0x0A: rDh = miu.readU32(rAh); rAh += 4; break; // lbu! rDg0, [rAg0] case 0x0B: rD = miu.readU8(rA); break; // sw! rDg0, [rAg0] case 0x0C: miu.writeU32(rA, rD); break; // sh! rDg0, [rAg0] case 0x0D: miu.writeU16(rA, rD); break; // push! rDgh, [rAg0] case 0x0E: miu.writeU32(rAh -= 4, rDh); break; // sb! rDg0, [rAg0] case 0x0F: miu.writeU8(rA, rD); break; } } break; case 0x03: { // j[l]! imm11 if(insn.jform.LK) link(); pc &= 0xFFFFF000; pc |= (insn.jform.Disp11 << 1) - 2; } break; case 0x04: { // b{cond}! imm8 if(conditional(insn.bxform.EC)) pc += (sign_extend(insn.bxform.Imm8, 8) << 1) - 2; } break; case 0x05: // ldiu! imm8 g0[insn.iform2.rD] = insn.iform2.Imm8; break; case 0x06: { uint32_t &rD = g0[insn.iform1.rD]; uint32_t imm = 1 << insn.iform1.Imm5; switch(insn.iform1.func3) { // srli! rD, imm5 case 0x03: rD = srl(rD, insn.iform1.Imm5, true); break; // bitclr! rD, imm5 case 0x04: rD = bit_and(rD, ~imm, true); break; // bitset! rD, imm5 case 0x05: rD = bit_or(rD, imm, true); break; // bittst! rD, imm5 case 0x06: bit_and(rD, imm, true); break; default: debugDump(); } } break; case 0x07: { uint32_t &rD = g0[insn.iform1.rD]; uint32_t imm = insn.iform1.Imm5 << 2; switch(insn.iform1.func3) { // lwp! rDg0, imm case 0x00: rD = miu.readU32(r2 + imm); break; // lbup! rDg0, imm case 0x01: rD = miu.readU8(r2 + imm); break; // lhp! rDg0, imm case 0x03: rD = sign_extend(miu.readU8(r2 + imm), 16); break; // swp! rDg0, imm case 0x04: miu.writeU32(r2 + imm, rD); break; // shp! rDg0, imm case 0x05: miu.writeU16(r2 + imm, rD); break; // sbp! rDg0, imm case 0x07: miu.writeU32(r2 + imm, rD); break; default: debugDump(); } } break; default: debugDump(); } }
__m128 fastpow_ps(__m128 x, __m128 y) { typedef SSEVector4f V4f; typedef SSEVector4i V4i; // Constants const V4f min_normal(constants::min_norm_pos.ps); const V4f inv_mantissa_mask(constants::inv_mant_mask.ps); const V4f const_1(constants::ps_1.ps); const V4i const_127(constants::pi32_0x7f.pi); const V4f log_p0(constants::am_log_p0.ps); const V4f log_p1(constants::am_log_p1.ps); const V4f log_p2(constants::am_log_p2.ps); const V4f log_q0(constants::am_log_q0.ps); const V4f log_q1(constants::am_log_q1.ps); const V4f log_q2(constants::am_log_q2.ps); const V4f log2_c0(constants::am_log2_c0.ps); // Remember negative values const V4f negative_mask(V4f::zero() < V4f(x)); // Cutoff denormalized stuff (preserving NaN and Infinity) const V4f x0 = max(x, min_normal); // First step: compute log(x) // Kill the exponent and combine with the exponent of 1.0f to get the // actual embedded mantissa as a valid floating point value: // a value in the range [1.0, 2.0) const V4f mantissa = (x0 & inv_mantissa_mask) | const_1; const V4f v_min1 = mantissa - const_1; const V4f v_plus1 = mantissa + const_1; // Extract the original exponent and undo the bias const V4i biasedExponent = srl(castAsInt(x0), 23); const V4f origExponent = toFloat(biasedExponent - const_127); V4f vFrac = v_min1 * rcp(v_plus1); // Is it worth it to use rcp_nr? vFrac += vFrac; const V4f vFracSqr = vFrac * vFrac; // Evaluate the polynomial const V4f polyP = ((((log_p0 * vFracSqr) + log_p1) * vFracSqr) + log_p2) * vFracSqr; const V4f polyQ = (((log_q0 * vFracSqr) + log_q1) * vFracSqr) + log_q2; const V4f logApprox = (polyP * rcp(polyQ)) * vFrac; // y * log2(x) V4f exponent = (logApprox * log2_c0) + ((vFrac * log2_c0) + origExponent); exponent *= y; // Constants for the exponential const V4f const_0p5(constants::ps_0p5.ps); const V4f exp2_hi(constants::am_exp2_hi.ps); const V4f exp2_lo(constants::am_exp2_lo.ps); const V4f exp2_p0(constants::am_exp2_p0.ps); const V4f exp2_p1(constants::am_exp2_p1.ps); const V4f exp2_p2(constants::am_exp2_p2.ps); const V4f exp2_q0(constants::am_exp2_q0.ps); const V4f exp2_q1(constants::am_exp2_q1.ps); // Clamp the exponent exponent = max(min(exponent, exp2_hi), exp2_lo); // More floating point tricks: normalize the mantissa to [1.0 - 1.5] const V4f normExponent = exponent + const_0p5; // Build the biased exponent const V4f expNegExponentMask = cmpnlt(V4f::zero(), normExponent); const V4f expNormalization = expNegExponentMask & const_1; const V4f truncExp = roundTruncate(normExponent); const V4f resExp = truncExp - expNormalization; V4i biasedExp = toInt(resExp) + const_127; biasedExp = sll(biasedExp, 23); const V4f exponentPart = castAsFloat(biasedExp) & negative_mask; // Get the fractional part of the exponent exponent -= resExp; const V4f exponentSqr = exponent * exponent; // Exp polynomial const V4f EPolyP = ((((exp2_p0 * exponentSqr) + exp2_p1) * exponentSqr) + exp2_p2) * exponent; const V4f EPolyQ = ((exp2_q0 * exponentSqr) + exp2_q1) - EPolyP; V4f expApprox = EPolyP * rcp(EPolyQ); expApprox += expApprox; expApprox += const_1; V4f result = expApprox * exponentPart; return result; }
void simulate_MainWindow::simulate() { currInstr=instrList[PC/4]; currBin=binList[PC/4]; vector<string> result; string temp=currInstr.toStdString(); string_split(temp,result); coutString=""; RD=RS=RT=immediate=address=0; v0=v1=v2=v3="None"; v0=result[0]; v1=result[1]; printf("v0=%s\nv1=%s\n",v0.c_str(),v1.c_str()); if(v0=="jr"||v0=="j"||v0=="jal") // 2 parametes { if(v0=="jr") { jr(); } else if(v0=="j") j(); else if(v0=="jal") jal(); } else if(v0=="lui") // 3 parameters { v2=result[2]; lui(); } else // 4 parameters { v2=result[2]; v3=result[3]; if(v0=="add") add(); else if(v0=="addu") addu(); else if(v0=="sub") sub(); else if(v0=="subu") subu(); else if(v0=="and") and_funct(); else if(v0=="or") or_funct(); else if(v0=="xor") xor_funct(); else if(v0=="nor") nor(); else if(v0=="slt") slt(); else if(v0=="sltu") sltu(); else if(v0=="sll") sll(); else if(v0=="srl") srl(); else if(v0=="sllv") sllv(); else if(v0=="srlv") srlv(); else if(v0=="srav") srav(); else if(v0=="addi") addi(); else if(v0=="addiu") addiu(); else if(v0=="andi") andi(); else if(v0=="ori") ori(); else if(v0=="xori") xori(); else if(v0=="sw") sw(); else if(v0=="lw") lw(); else if(v0=="beq") beq(); else if(v0=="bne") bne(); else if(v0=="slti") slti(); else if(v0=="sltiu") sltiu(); } display_all(); }
Leaf * IntervallTree_bed::drr(Leaf * &p1) { p1->right = srl(p1->right); return srr(p1); }
void execution(int index){ if(test==1) printf("enter EX, with index=%d\n",index); if(index==0 || index==34){ //NOP or HALT return; } /**R-type instructions**/ else if(index==1){ add(RS,RT,RD); } else if(index==2){ addu(RS,RT,RD); } else if(index==3){ sub(RS,RT,RD); } else if(index==4){ and(RS,RT,RD); } else if(index==5){ or(RS,RT,RD); } else if(index==6){ xor(RS,RT,RD); } else if(index==7){ nor(RS,RT,RD); } else if(index==8){ nand(RS,RT,RD); } else if(index==9){ slt(RS,RT,RD); } else if(index==10){ sll(RT,RD,SHAMT); } else if(index==11){ srl(RT,RD,SHAMT); } else if(index==12){ sra(RT,RD,SHAMT); } /**J-type instructions**/ /*else if(index==13){ jr(RS); } else if(index==14){ jj(C); } else if(index==15){ jal(C); }*/ /**I-type instructions**/ else if(index==16){ addi(RS,RT,C); } else if(index==17){ addiu(RS,RT,C); } else if(index==18){ lw(RS,RT,signedC); } else if(index==19){ lh(RS,RT,signedC); } else if(index==20){ lhu(RS,RT,C); } else if(index==21){ lb(RS,RT,signedC); } else if(index==22){ lbu(RS,RT,C); } else if(index==23){ sw(RS,RT,signedC); } else if(index==24){ sh(RS,RT,signedC); } else if(index==25){ sb(RS,RT,signedC); } else if(index==26){ lui(RT,C); } else if(index==27){ andi(RS,RT,C); } else if(index==28){ or(RS,RT,C); } else if(index==29){ nor(RS,RT,C); } else if(index==30){ slti(RS,RT,C); } /*else if(index==31){ beq(RS,RT,signedC); } else if(index==32){ bne(RS,RT,signedC); } else if(index==33){ bgtz(RS,signedC); }*/ else{ if(test==1) printf("this is error instruction or is done in ID\n"); } EX_prev=index; DM_index=index; }
void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list, void** vtable, char** md_top, char* md_end, char** mc_top, char* mc_end) { intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); *(intptr_t *)(*md_top) = vtable_bytes; *md_top += sizeof(intptr_t); void** dummy_vtable = (void**)*md_top; *vtable = dummy_vtable; *md_top += vtable_bytes; guarantee(*md_top <= md_end, "Insufficient space for vtables."); // Get ready to generate dummy methods. CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); MacroAssembler* masm = new MacroAssembler(&cb); Label common_code; for (int i = 0; i < vtbl_list_size; ++i) { for (int j = 0; j < num_virtuals; ++j) { dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); __ save(SP, -256, SP); __ brx(Assembler::always, false, Assembler::pt, common_code); // Load L0 with a value indicating vtable/offset pair. // -- bits[ 7..0] (8 bits) which virtual method in table? // -- bits[12..8] (5 bits) which virtual method table? // -- must fit in 13-bit instruction immediate field. __ delayed()->set((i << 8) + j, L0); } } __ bind(common_code); // Expecting to be called with the "this" pointer in O0/I0 (where // "this" is a Klass object). In addition, L0 was set (above) to // identify the method and table. // Look up the correct vtable pointer. __ set((intptr_t)vtbl_list, L2); // L2 = address of new vtable list. __ srl(L0, 8, L3); // Isolate L3 = vtable identifier. __ sll(L3, LogBytesPerWord, L3); __ ld_ptr(L2, L3, L3); // L3 = new (correct) vtable pointer. __ st_ptr(L3, Address(I0, 0)); // Save correct vtable ptr in entry. // Restore registers and jump to the correct method; __ and3(L0, 255, L4); // Isolate L3 = method offset;. __ sll(L4, LogBytesPerWord, L4); __ ld_ptr(L3, L4, L4); // Get address of correct virtual method __ jmpl(L4, 0, G0); // Jump to correct method. __ delayed()->restore(); // Restore registers. __ flush(); *mc_top = (char*)__ pc(); guarantee(*mc_top <= mc_end, "Insufficient space for method wrappers."); }
int encode_op(char *opcode, char *op_data) { int rd,rs,rt,imm,funct,shaft,target; char tmp[256]; const char *fi = "%s %d"; const char *fg = "%s %%g%d"; const char *ff = "%s %%f%d"; const char *fl = "%s %s"; const char *fgi = "%s %%g%d, %d"; const char *fgl = "%s %%g%d, %s"; const char *fgg = "%s %%g%d, %%g%d"; const char *fggl = "%s %%g%d, %%g%d, %s"; const char *fggi = "%s %%g%d, %%g%d, %d"; const char *fggg = "%s %%g%d, %%g%d, %%g%d"; const char *fff = "%s %%f%d, %%f%d"; const char *fgf = "%s %%g%d, %%f%d"; const char *ffg = "%s %%f%d, %%g%d"; const char *fffl = "%s %%f%d, %%f%d, %s"; const char *ffff = "%s %%f%d, %%f%d, %%f%d"; const char *ffgi = "%s %%f%d, %%g%d, %d"; const char *ffgg = "%s %%f%d, %%g%d, %%g%d"; char lname[256]; shaft = funct = target = 0; if(strcmp(opcode, "mvhi") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return mvhi(rs,0,imm); } if(strcmp(opcode, "mvlo") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return mvlo(rs,0,imm); } if(strcmp(opcode, "add") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return add(rs,rt,rd,0); } if(strcmp(opcode, "nor") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return nor(rs,rt,rd,0); } if(strcmp(opcode, "sub") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return sub(rs,rt,rd,0); } if(strcmp(opcode, "mul") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return mul(rs,rt,rd,0); } if(strcmp(opcode, "addi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return addi(rs,rt,imm); } if(strcmp(opcode, "subi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return subi(rs,rt,imm); } if(strcmp(opcode, "muli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return muli(rs,rt,imm); } if(strcmp(opcode, "input") == 0){ if(sscanf(op_data, fg, tmp, &rd) == 2) return input(0,0,rd,0); } if(strcmp(opcode, "inputw") == 0){ if(sscanf(op_data, fg, tmp, &rd) == 2) return inputw(0,0,rd,0); } if(strcmp(opcode, "inputf") == 0){ if(sscanf(op_data, ff, tmp, &rd) == 2) return inputf(0,0,rd,0); } if(strcmp(opcode, "output") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return output(rs,0,0,0); } if(strcmp(opcode, "outputw") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return outputw(rs,0,0,0); } if(strcmp(opcode, "outputf") == 0){ if(sscanf(op_data, ff, tmp, &rs) == 2) return outputf(rs,0,0,0); } if(strcmp(opcode, "and") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return _and(rs,rt,rd,0); } if(strcmp(opcode, "or") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return _or(rs,rt,rd,0); } if(strcmp(opcode, "sll") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return sll(rs,rt,rd,0); } if(strcmp(opcode, "srl") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return srl(rs,rt,rd,0); } if(strcmp(opcode, "slli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return slli(rs,rt,imm); } if(strcmp(opcode, "srli") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return srli(rs,rt,imm); } if(strcmp(opcode, "b") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return b(rs,0,0,0); } if(strcmp(opcode, "jmp") == 0){ if(sscanf(op_data, fl, tmp, lname) == 2) { strcpy(label_name[label_cnt],lname); return jmp(label_cnt++); } } if(strcmp(opcode, "jeq") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jeq(rs,rt,label_cnt++); } } if(strcmp(opcode, "jne") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jne(rs,rt,label_cnt++); } } if(strcmp(opcode, "jlt") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jlt(rs,rt,label_cnt++); } } if(strcmp(opcode, "jle") == 0){ if(sscanf(op_data, fggl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return jle(rs,rt,label_cnt++); } } if(strcmp(opcode, "call") == 0){ if(sscanf(op_data, fl, tmp, lname) == 2) { strcpy(label_name[label_cnt],lname); return call(label_cnt++); } } if(strcmp(opcode, "callR") == 0){ if(sscanf(op_data, fg, tmp, &rs) == 2) return callr(rs,0,0,0); } if(strcmp(opcode, "return") == 0){ return _return(0); } if(strcmp(opcode, "ld") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return ld(rs,rt,rd,0); } if(strcmp(opcode, "ldi") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return ldi(rs,rt,imm); } if(strcmp(opcode, "ldlr") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return ldlr(rs,0,imm); } if(strcmp(opcode, "fld") == 0){ if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4) return fld(rs,rt,rd,0); } if(strcmp(opcode, "st") == 0){ if(sscanf(op_data, fggg, tmp, &rd, &rs,&rt) == 4) return st(rs,rt,rd,0); } if(strcmp(opcode, "sti") == 0){ if(sscanf(op_data, fggi, tmp, &rt, &rs, &imm) == 4) return sti(rs,rt,imm); } if(strcmp(opcode, "stlr") == 0){ if(sscanf(op_data, fgi, tmp, &rs, &imm) == 3) return stlr(rs,0,imm); } if(strcmp(opcode, "fst") == 0){ if(sscanf(op_data, ffgg, tmp, &rd, &rs,&rt) == 4) return fst(rs,rt,rd,0); } if(strcmp(opcode, "fadd") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fadd(rs,rt,rd,0); } if(strcmp(opcode, "fsub") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fsub(rs,rt,rd,0); } if(strcmp(opcode, "fmul") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fmul(rs,rt,rd,0); } if(strcmp(opcode, "fdiv") == 0){ if(sscanf(op_data, ffff, tmp, &rd, &rs, &rt) == 4) return fdiv(rs,rt,rd,0); } if(strcmp(opcode, "fsqrt") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fsqrt(rs,0,rd,0); } if(strcmp(opcode, "fabs") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return _fabs(rs,0,rd,0); } if(strcmp(opcode, "fmov") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fmov(rs,0,rd,0); } if(strcmp(opcode, "fneg") == 0){ if(sscanf(op_data, fff, tmp, &rd, &rs) == 3) return fneg(rs,0,rd,0); } if(strcmp(opcode, "fldi") == 0){ if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4) return fldi(rs,rt,imm); } if(strcmp(opcode, "fsti") == 0){ if(sscanf(op_data, ffgi, tmp, &rt, &rs, &imm) == 4) return fsti(rs,rt,imm); } if(strcmp(opcode, "fjeq") == 0){ if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return fjeq(rs,rt,label_cnt++); } } if(strcmp(opcode, "fjlt") == 0){ if(sscanf(op_data, fffl, tmp, &rs, &rt, lname) == 4) { strcpy(label_name[label_cnt],lname); return fjlt(rs,rt,label_cnt++); } } if(strcmp(opcode, "halt") == 0){ return halt(0,0,0,0); } if(strcmp(opcode, "setL") == 0){ if(sscanf(op_data, fgl, tmp, &rd, lname) == 3) { strcpy(label_name[label_cnt],lname); return setl(0,rd,label_cnt++); } } if(strcmp(opcode, "padd") == 0){ if(sscanf(op_data, fgi, tmp, &rt, &imm) == 3) { return padd(0,rt,imm); } } if(strcmp(opcode, "link") == 0){ if(sscanf(op_data, fi, tmp, &imm) == 2) { return link(0,0,imm); } } if(strcmp(opcode, "movlr") == 0){ return movlr(0,0,0,0); } if(strcmp(opcode, "btmplr") == 0){ return btmplr(0,0,0,0); } /* if(strcmp(opcode, "padd") == 0){ if(sscanf(op_data, fgg, tmp, &rd, &rt) == 3) { return padd(0,rt,d,0); } } */ return -1; }