/* * Load a immediate using a shortcut if possible; otherwise * grab from the per-translation literal pool. If target is * a high register, build constant into a low register and copy. * * No additional register clobbering operation performed. Use this version when * 1) rDest is freshly returned from dvmCompilerAllocTemp or * 2) The codegen is under fixed register usage */ static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, int value) { ArmLIR *res; int tDest = LOWREG(rDest) ? rDest : dvmCompilerAllocTemp(cUnit); /* See if the value can be constructed cheaply */ if ((value >= 0) && (value <= 255)) { res = newLIR2(cUnit, kThumbMovImm, tDest, value); if (rDest != tDest) { opRegReg(cUnit, kOpMov, rDest, tDest); dvmCompilerFreeTemp(cUnit, tDest); } return res; } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) { res = newLIR2(cUnit, kThumbMovImm, tDest, ~value); newLIR2(cUnit, kThumbMvn, tDest, tDest); if (rDest != tDest) { opRegReg(cUnit, kOpMov, rDest, tDest); dvmCompilerFreeTemp(cUnit, tDest); } return res; } /* No shortcut - go ahead and use literal pool */ ArmLIR *dataTarget = scanLiteralPool(cUnit->literalList, value, 255); if (dataTarget == NULL) { dataTarget = addWordData(cUnit, &cUnit->literalList, value); } ArmLIR *loadPcRel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); loadPcRel->opcode = kThumbLdrPcRel; loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = tDest; setupResourceMasks(loadPcRel); setMemRefType(loadPcRel, true, kLiteral); loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); /* * To save space in the constant pool, we use the ADD_RRI8 instruction to * add up to 255 to an existing constant value. */ if (dataTarget->operands[0] != value) { newLIR2(cUnit, kThumbAddRI8, tDest, value - dataTarget->operands[0]); } if (rDest != tDest) { opRegReg(cUnit, kOpMov, rDest, tDest); dvmCompilerFreeTemp(cUnit, tDest); } return res; }
/* * For monitor unlock, we don't have to use ldrex/strex. Once * we've determined that the lock is thin and that we own it with * a zero recursion count, it's safe to punch it back to the * initial, unlock thin state with a store word. */ static void genMonitorExit(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); ArmLIR *target; ArmLIR *branch; ArmLIR *hopTarget; ArmLIR *hopBranch; assert(LW_SHAPE_THIN == 0); loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock loadWordDisp(cUnit, r6SELF, offsetof(Thread, threadId), r3); // Get threadId // Is lock unheld on lock or held by us (==threadId) on unlock? opRegRegImm(cUnit, kOpAnd, r7, r2, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); opRegReg(cUnit, kOpSub, r2, r3); hopBranch = opCondBranch(cUnit, kArmCondNe); dvmCompilerGenMemBarrier(cUnit, kSY); storeWordDisp(cUnit, r1, offsetof(Object, lock), r7); branch = opNone(cUnit, kOpUncondBr); hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); hopTarget->defMask = ENCODE_ALL; hopBranch->generic.target = (LIR *)hopTarget; // Export PC (part 1) loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); LOAD_FUNC_ADDR(cUnit, r7, (int)dvmUnlockObject); genRegCopy(cUnit, r0, r6SELF); // Export PC (part 2) newLIR3(cUnit, kThumb2StrRRI8Predec, r3, r5FP, sizeof(StackSaveArea) - offsetof(StackSaveArea, xtra.currentPc)); opReg(cUnit, kOpBlx, r7); /* Did we throw? */ ArmLIR *branchOver = genCmpImmBranch(cUnit, kArmCondNe, r0, 0); loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset + dexGetWidthFromOpcode(OP_MONITOR_EXIT))); genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); // Resume here target = newLIR0(cUnit, kArmPseudoTargetLabel); target->defMask = ENCODE_ALL; branch->generic.target = (LIR *)target; branchOver->generic.target = (LIR *) target; }
/* Export the Dalvik PC assicated with an instruction to the StackSave area */ static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir) { ArmLIR *res; int offset = offsetof(StackSaveArea, xtra.currentPc); int rDPC = dvmCompilerAllocTemp(cUnit); res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); newLIR3(cUnit, kThumb2StrRRI8Predec, rDPC, r5FP, sizeof(StackSaveArea) - offset); dvmCompilerFreeTemp(cUnit, rDPC); return res; }
/* * Handle simple case (thin lock) inline. If it's complicated, bail * out to the heavyweight lock/unlock routines. We'll use dedicated * registers here in order to be in the right position in case we * to bail to dvm[Lock/Unlock]Object(self, object) * * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object * r1 -> object [arg1 for dvm[Lock/Unlock]Object * r2 -> intial contents of object->lock, later result of strex * r3 -> self->threadId * r7 -> temp to hold new lock value [unlock only] * r4 -> allow to be used by utilities as general temp * * The result of the strex is 0 if we acquire the lock. * * See comments in Sync.c for the layout of the lock word. * Of particular interest to this code is the test for the * simple case - which we handle inline. For monitor enter, the * simple case is thin lock, held by no-one. For monitor exit, * the simple case is thin lock, held by the unlocking thread with * a recurse count of 0. * * A minor complication is that there is a field in the lock word * unrelated to locking: the hash state. This field must be ignored, but * preserved. * */ static void genMonitorEnter(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); bool enter = (mir->dalvikInsn.opCode == OP_MONITOR_ENTER); ArmLIR *target; ArmLIR *hopTarget; ArmLIR *branch; ArmLIR *hopBranch; assert(LW_SHAPE_THIN == 0); loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0); // Get self genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); loadWordDisp(cUnit, r0, offsetof(Thread, threadId), r3); // Get threadId newLIR3(cUnit, kThumb2Ldrex, r2, r1, offsetof(Object, lock) >> 2); // Get object->lock opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner // Is lock unheld on lock or held by us (==threadId) on unlock? newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1); newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0); newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2); branch = newLIR2(cUnit, kThumb2Cbz, r2, 0); hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); hopTarget->defMask = ENCODE_ALL; hopBranch->generic.target = (LIR *)hopTarget; // Clear the lock ArmLIR *inst = newLIR0(cUnit, kThumb2Clrex); // ...and make it a scheduling barrier inst->defMask = ENCODE_ALL; // Export PC (part 1) loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); /* Get dPC of next insn */ loadConstant(cUnit, r4PC, (int)(cUnit->method->insns + mir->offset + dexGetInstrWidthAbs(gDvm.instrWidth, OP_MONITOR_ENTER))); // Export PC (part 2) newLIR3(cUnit, kThumb2StrRRI8Predec, r3, rFP, sizeof(StackSaveArea) - offsetof(StackSaveArea, xtra.currentPc)); /* Call template, and don't return */ genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER); // Resume here target = newLIR0(cUnit, kArmPseudoTargetLabel); target->defMask = ENCODE_ALL; branch->generic.target = (LIR *)target; }
static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) { int offset = offsetof(InterpState, retval); RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); int reg0 = loadValue(cUnit, rlSrc, kCoreReg).lowReg; int signMask = dvmCompilerAllocTemp(cUnit); loadConstant(cUnit, signMask, 0x7fffffff); newLIR2(cUnit, kThumbAndRR, reg0, signMask); dvmCompilerFreeTemp(cUnit, signMask); storeWordDisp(cUnit, rGLUE, offset, reg0); //TUNING: rewrite this to not clobber dvmCompilerClobber(cUnit, reg0); return true; }
static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir) { int offset = offsetof(Thread, interpSave.retval); RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); RegLocation regSrc = loadValueWide(cUnit, rlSrc, kCoreReg); int reglo = regSrc.lowReg; int reghi = regSrc.highReg; int signMask = dvmCompilerAllocTemp(cUnit); loadConstant(cUnit, signMask, 0x7fffffff); storeWordDisp(cUnit, r6SELF, offset, reglo); newLIR2(cUnit, kThumbAndRR, reghi, signMask); dvmCompilerFreeTemp(cUnit, signMask); storeWordDisp(cUnit, r6SELF, offset + 4, reghi); //TUNING: rewrite this to not clobber dvmCompilerClobber(cUnit, reghi); return false; }
/* * To avoid possible conflicts, we use a lot of temps here. Note that * our usage of Thumb2 instruction forms avoids the problems with register * reuse for multiply instructions prior to arm6. */ static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { RegLocation rlResult; int resLo = dvmCompilerAllocTemp(cUnit); int resHi = dvmCompilerAllocTemp(cUnit); int tmp1 = dvmCompilerAllocTemp(cUnit); rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg); newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg); newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1); newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0); dvmCompilerFreeTemp(cUnit, tmp1); rlResult = dvmCompilerGetReturnWide(cUnit); // Just as a template, will patch rlResult.lowReg = resLo; rlResult.highReg = resHi; storeValueWide(cUnit, rlDest, rlResult); }
/* * Generate array store * */ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, RegLocation rlArray, RegLocation rlIndex, RegLocation rlSrc, int scale) { RegisterClass regClass = dvmCompilerRegClassBySize(size); int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); int regPtr; rlArray = loadValue(cUnit, rlArray, kCoreReg); rlIndex = loadValue(cUnit, rlIndex, kCoreReg); if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) { dvmCompilerClobber(cUnit, rlArray.lowReg); regPtr = rlArray.lowReg; } else { regPtr = dvmCompilerAllocTemp(cUnit); genRegCopy(cUnit, regPtr, rlArray.lowReg); } /* null object? */ ArmLIR * pcrLabel = NULL; if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir->offset, NULL); } if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { int regLen = dvmCompilerAllocTemp(cUnit); //NOTE: max live temps(4) here. /* Get len */ loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); /* regPtr -> array data */ opRegImm(cUnit, kOpAdd, regPtr, dataOffset); genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, pcrLabel); dvmCompilerFreeTemp(cUnit, regLen); } else { /* regPtr -> array data */ opRegImm(cUnit, kOpAdd, regPtr, dataOffset); } /* at this point, regPtr points to array, 2 live temps */ if ((size == kLong) || (size == kDouble)) { //TODO: need specific wide routine that can handle fp regs if (scale) { int rNewIndex = dvmCompilerAllocTemp(cUnit); opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale); opRegReg(cUnit, kOpAdd, regPtr, rNewIndex); } else { opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg); } rlSrc = loadValueWide(cUnit, rlSrc, regClass); HEAP_ACCESS_SHADOW(true); storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg); HEAP_ACCESS_SHADOW(false); dvmCompilerFreeTemp(cUnit, regPtr); } else { rlSrc = loadValue(cUnit, rlSrc, regClass); HEAP_ACCESS_SHADOW(true); storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg, scale, size); HEAP_ACCESS_SHADOW(false); } }
/* * Generate array load */ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, RegLocation rlArray, RegLocation rlIndex, RegLocation rlDest, int scale) { RegisterClass regClass = dvmCompilerRegClassBySize(size); int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); RegLocation rlResult; rlArray = loadValue(cUnit, rlArray, kCoreReg); rlIndex = loadValue(cUnit, rlIndex, kCoreReg); int regPtr; /* null object? */ ArmLIR * pcrLabel = NULL; if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir->offset, NULL); } regPtr = dvmCompilerAllocTemp(cUnit); if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { int regLen = dvmCompilerAllocTemp(cUnit); /* Get len */ loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); /* regPtr -> array data */ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset); genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, pcrLabel); dvmCompilerFreeTemp(cUnit, regLen); } else { /* regPtr -> array data */ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset); } if ((size == kLong) || (size == kDouble)) { if (scale) { int rNewIndex = dvmCompilerAllocTemp(cUnit); opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale); opRegReg(cUnit, kOpAdd, regPtr, rNewIndex); dvmCompilerFreeTemp(cUnit, rNewIndex); } else { opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg); } rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); HEAP_ACCESS_SHADOW(true); loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg); HEAP_ACCESS_SHADOW(false); dvmCompilerFreeTemp(cUnit, regPtr); storeValueWide(cUnit, rlDest, rlResult); } else { rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); HEAP_ACCESS_SHADOW(true); loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg, scale, size); HEAP_ACCESS_SHADOW(false); dvmCompilerFreeTemp(cUnit, regPtr); storeValue(cUnit, rlDest, rlResult); } }