static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) { ArmLIR *branch; RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); RegLocation rlDest = inlinedTargetWide(cUnit, mir, true); rlSrc = loadValueWide(cUnit, rlSrc, kFPReg); RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); newLIR2(cUnit, kThumb2Vsqrtd, S2D(rlResult.lowReg, rlResult.highReg), S2D(rlSrc.lowReg, rlSrc.highReg)); newLIR2(cUnit, kThumb2Vcmpd, S2D(rlResult.lowReg, rlResult.highReg), S2D(rlResult.lowReg, rlResult.highReg)); newLIR0(cUnit, kThumb2Fmstat); branch = newLIR2(cUnit, kThumbBCond, 0, kArmCondEq); dvmCompilerClobberCallRegs(cUnit); LOAD_FUNC_ADDR(cUnit, r2, (int) (double (*)(double)) sqrt); newLIR3(cUnit, kThumb2Fmrrd, r0, r1, S2D(rlSrc.lowReg, rlSrc.highReg)); newLIR1(cUnit, kThumbBlxR, r2); newLIR3(cUnit, kThumb2Fmdrr, S2D(rlResult.lowReg, rlResult.highReg), r0, r1); ArmLIR *label = newLIR0(cUnit, kArmPseudoTargetLabel); label->defMask = ENCODE_ALL; branch->generic.target = (LIR *)label; storeValueWide(cUnit, rlDest, rlResult); return false; }
static int genTraceProfileEntry(CompilationUnit *cUnit) { intptr_t addr = (intptr_t)dvmJitNextTraceCounter(); assert(__BYTE_ORDER == __LITTLE_ENDIAN); newLIR1(cUnit, kArm16BitData, addr & 0xffff); newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff); cUnit->chainCellOffsetLIR = (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); cUnit->headerSize = 6; if ((gDvmJit.profileMode == kTraceProfilingContinuous) || (gDvmJit.profileMode == kTraceProfilingDisabled)) { /* Thumb[2] instruction used directly here to ensure correct size */ newLIR2(cUnit, kThumb2LdrPcReln12, r0, 8); newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0); newLIR2(cUnit, kThumbAddRI8, r1, 1); newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0); return 10; } else { int opcode = TEMPLATE_PERIODIC_PROFILING; newLIR2(cUnit, kThumbBlx1, (int) gDvmJit.codeCache + templateEntryOffsets[opcode], (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); newLIR2(cUnit, kThumbBlx2, (int) gDvmJit.codeCache + templateEntryOffsets[opcode], (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); return 4; } }
/* * For monitor unlock, we don't have to use ldrex/strex. Once * we've determined that the lock is thin and that we own it with * a zero recursion count, it's safe to punch it back to the * initial, unlock thin state with a store word. */ static void genMonitorExit(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); ArmLIR *target; ArmLIR *branch; ArmLIR *hopTarget; ArmLIR *hopBranch; assert(LW_SHAPE_THIN == 0); loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock loadWordDisp(cUnit, r6SELF, offsetof(Thread, threadId), r3); // Get threadId // Is lock unheld on lock or held by us (==threadId) on unlock? opRegRegImm(cUnit, kOpAnd, r7, r2, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); opRegReg(cUnit, kOpSub, r2, r3); hopBranch = opCondBranch(cUnit, kArmCondNe); dvmCompilerGenMemBarrier(cUnit, kSY); storeWordDisp(cUnit, r1, offsetof(Object, lock), r7); branch = opNone(cUnit, kOpUncondBr); hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); hopTarget->defMask = ENCODE_ALL; hopBranch->generic.target = (LIR *)hopTarget; // Export PC (part 1) loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); LOAD_FUNC_ADDR(cUnit, r7, (int)dvmUnlockObject); genRegCopy(cUnit, r0, r6SELF); // Export PC (part 2) newLIR3(cUnit, kThumb2StrRRI8Predec, r3, r5FP, sizeof(StackSaveArea) - offsetof(StackSaveArea, xtra.currentPc)); opReg(cUnit, kOpBlx, r7); /* Did we throw? */ ArmLIR *branchOver = genCmpImmBranch(cUnit, kArmCondNe, r0, 0); loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset + dexGetWidthFromOpcode(OP_MONITOR_EXIT))); genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); // Resume here target = newLIR0(cUnit, kArmPseudoTargetLabel); target->defMask = ENCODE_ALL; branch->generic.target = (LIR *)target; branchOver->generic.target = (LIR *) target; }
/* * Handle simple case (thin lock) inline. If it's complicated, bail * out to the heavyweight lock/unlock routines. We'll use dedicated * registers here in order to be in the right position in case we * to bail to dvm[Lock/Unlock]Object(self, object) * * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object * r1 -> object [arg1 for dvm[Lock/Unlock]Object * r2 -> intial contents of object->lock, later result of strex * r3 -> self->threadId * r7 -> temp to hold new lock value [unlock only] * r4 -> allow to be used by utilities as general temp * * The result of the strex is 0 if we acquire the lock. * * See comments in Sync.c for the layout of the lock word. * Of particular interest to this code is the test for the * simple case - which we handle inline. For monitor enter, the * simple case is thin lock, held by no-one. For monitor exit, * the simple case is thin lock, held by the unlocking thread with * a recurse count of 0. * * A minor complication is that there is a field in the lock word * unrelated to locking: the hash state. This field must be ignored, but * preserved. * */ static void genMonitorEnter(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); bool enter = (mir->dalvikInsn.opCode == OP_MONITOR_ENTER); ArmLIR *target; ArmLIR *hopTarget; ArmLIR *branch; ArmLIR *hopBranch; assert(LW_SHAPE_THIN == 0); loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0); // Get self genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); loadWordDisp(cUnit, r0, offsetof(Thread, threadId), r3); // Get threadId newLIR3(cUnit, kThumb2Ldrex, r2, r1, offsetof(Object, lock) >> 2); // Get object->lock opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner // Is lock unheld on lock or held by us (==threadId) on unlock? newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1); newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0); newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2); branch = newLIR2(cUnit, kThumb2Cbz, r2, 0); hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); hopTarget->defMask = ENCODE_ALL; hopBranch->generic.target = (LIR *)hopTarget; // Clear the lock ArmLIR *inst = newLIR0(cUnit, kThumb2Clrex); // ...and make it a scheduling barrier inst->defMask = ENCODE_ALL; // Export PC (part 1) loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); /* Get dPC of next insn */ loadConstant(cUnit, r4PC, (int)(cUnit->method->insns + mir->offset + dexGetInstrWidthAbs(gDvm.instrWidth, OP_MONITOR_ENTER))); // Export PC (part 2) newLIR3(cUnit, kThumb2StrRRI8Predec, r3, rFP, sizeof(StackSaveArea) - offsetof(StackSaveArea, xtra.currentPc)); /* Call template, and don't return */ genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER); // Resume here target = newLIR0(cUnit, kArmPseudoTargetLabel); target->defMask = ENCODE_ALL; branch->generic.target = (LIR *)target; }
/* Export the Dalvik PC assicated with an instruction to the StackSave area */ static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir) { ArmLIR *res; int offset = offsetof(StackSaveArea, xtra.currentPc); int rDPC = dvmCompilerAllocTemp(cUnit); res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); newLIR3(cUnit, kThumb2StrRRI8Predec, rDPC, r5FP, sizeof(StackSaveArea) - offset); dvmCompilerFreeTemp(cUnit, rDPC); return res; }
/* * Perform a "reg cmp reg" operation and jump to the PCR region if condition * satisfies. */ static MipsLIR *genRegRegCheck(CompilationUnit *cUnit, MipsConditionCode cond, int reg1, int reg2, int dOffset, MipsLIR *pcrLabel) { MipsLIR *res = NULL; if (cond == kMipsCondGe) { /* signed >= case */ int tReg = dvmCompilerAllocTemp(cUnit); res = newLIR3(cUnit, kMipsSlt, tReg, reg1, reg2); MipsLIR *branch = opCompareBranch(cUnit, kMipsBeqz, tReg, -1); genCheckCommon(cUnit, dOffset, branch, pcrLabel); } else if (cond == kMipsCondCs) { /* unsigned >= case */ int tReg = dvmCompilerAllocTemp(cUnit); res = newLIR3(cUnit, kMipsSltu, tReg, reg1, reg2); MipsLIR *branch = opCompareBranch(cUnit, kMipsBeqz, tReg, -1); genCheckCommon(cUnit, dOffset, branch, pcrLabel); } else { ALOGE("Unexpected condition in genRegRegCheck: %d\n", (int) cond); dvmAbort(); } return res; }
static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { int op = kThumbBkpt; RegLocation rlResult; switch (mir->dalvikInsn.opcode) { case OP_ADD_DOUBLE_2ADDR: case OP_ADD_DOUBLE: op = kThumb2Vaddd; break; case OP_SUB_DOUBLE_2ADDR: case OP_SUB_DOUBLE: op = kThumb2Vsubd; break; case OP_DIV_DOUBLE_2ADDR: case OP_DIV_DOUBLE: op = kThumb2Vdivd; break; case OP_MUL_DOUBLE_2ADDR: case OP_MUL_DOUBLE: op = kThumb2Vmuld; break; case OP_REM_DOUBLE_2ADDR: case OP_REM_DOUBLE: case OP_NEG_DOUBLE: { return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); } default: return true; } rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg); assert(rlSrc1.wide); rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg); assert(rlSrc2.wide); rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); assert(rlDest.wide); assert(rlResult.wide); newLIR3(cUnit, (ArmOpcode)op, S2D(rlResult.lowReg, rlResult.highReg), S2D(rlSrc1.lowReg, rlSrc1.highReg), S2D(rlSrc2.lowReg, rlSrc2.highReg)); storeValueWide(cUnit, rlDest, rlResult); return false; }
static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { int op = kThumbBkpt; RegLocation rlResult; /* * Don't attempt to optimize register usage since these opcodes call out to * the handlers. */ switch (mir->dalvikInsn.opcode) { case OP_ADD_FLOAT_2ADDR: case OP_ADD_FLOAT: op = kThumb2Vadds; break; case OP_SUB_FLOAT_2ADDR: case OP_SUB_FLOAT: op = kThumb2Vsubs; break; case OP_DIV_FLOAT_2ADDR: case OP_DIV_FLOAT: op = kThumb2Vdivs; break; case OP_MUL_FLOAT_2ADDR: case OP_MUL_FLOAT: op = kThumb2Vmuls; break; case OP_REM_FLOAT_2ADDR: case OP_REM_FLOAT: case OP_NEG_FLOAT: { return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); } default: return true; } rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg); rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg); rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); newLIR3(cUnit, (ArmOpcode)op, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); storeValue(cUnit, rlDest, rlResult); return false; }
static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, int vSrc2) { int op = THUMB_BKPT; /* * Don't attempt to optimize register usage since these opcodes call out to * the handlers. */ switch (mir->dalvikInsn.opCode) { case OP_ADD_DOUBLE_2ADDR: case OP_ADD_DOUBLE: op = THUMB2_VADDD; break; case OP_SUB_DOUBLE_2ADDR: case OP_SUB_DOUBLE: op = THUMB2_VSUBD; break; case OP_DIV_DOUBLE_2ADDR: case OP_DIV_DOUBLE: op = THUMB2_VDIVD; break; case OP_MUL_DOUBLE_2ADDR: case OP_MUL_DOUBLE: op = THUMB2_VMULD; break; case OP_REM_DOUBLE_2ADDR: case OP_REM_DOUBLE: case OP_NEG_DOUBLE: { return genArithOpDoublePortable(cUnit, mir, vDest, vSrc1, vSrc2); } default: return true; } loadDouble(cUnit, vSrc1, dr1); loadDouble(cUnit, vSrc2, dr2); newLIR3(cUnit, op, dr0, dr1, dr2); storeDouble(cUnit, dr0, vDest, 0); return false; }
static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, int vSrc2) { int op = THUMB_BKPT; /* * Don't attempt to optimize register usage since these opcodes call out to * the handlers. */ switch (mir->dalvikInsn.opCode) { case OP_ADD_FLOAT_2ADDR: case OP_ADD_FLOAT: op = THUMB2_VADDS; break; case OP_SUB_FLOAT_2ADDR: case OP_SUB_FLOAT: op = THUMB2_VSUBS; break; case OP_DIV_FLOAT_2ADDR: case OP_DIV_FLOAT: op = THUMB2_VDIVS; break; case OP_MUL_FLOAT_2ADDR: case OP_MUL_FLOAT: op = THUMB2_VMULS; break; case OP_REM_FLOAT_2ADDR: case OP_REM_FLOAT: case OP_NEG_FLOAT: { return genArithOpFloatPortable(cUnit, mir, vDest, vSrc1, vSrc2); } default: return true; } loadFloat(cUnit, vSrc1, fr2); loadFloat(cUnit, vSrc2, fr4); newLIR3(cUnit, op, fr0, fr2, fr4); storeFloat(cUnit, fr0, vDest, 0); return false; }
/* * To avoid possible conflicts, we use a lot of temps here. Note that * our usage of Thumb2 instruction forms avoids the problems with register * reuse for multiply instructions prior to arm6. */ static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { RegLocation rlResult; int resLo = dvmCompilerAllocTemp(cUnit); int resHi = dvmCompilerAllocTemp(cUnit); int tmp1 = dvmCompilerAllocTemp(cUnit); rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg); newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg); newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1); newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0); dvmCompilerFreeTemp(cUnit, tmp1); rlResult = dvmCompilerGetReturnWide(cUnit); // Just as a template, will patch rlResult.lowReg = resLo; rlResult.highReg = resHi; storeValueWide(cUnit, rlDest, rlResult); }