// Find the IndexSet such that modDown to that set of primes makes the // additive term due to rounding into the dominant noise term void Ctxt::findBaseSet(IndexSet& s) const { if (getNoiseVar()<=0.0) { // an empty ciphertext s = context.ctxtPrimes; return; } assert(verifyPrimeSet()); bool halfSize = context.containsSmallPrime(); double curNoise = log(getNoiseVar())/2; double firstNoise = context.logOfPrime(0); double noiseThreshold = log(modSwitchAddedNoiseVar())*0.55; // FIXME: The above should have been 0.5. Making it a bit more means // that we will mod-switch a little less frequently, whether this is // a good thing needs to be tested. // remove special primes, if they are included in this->primeSet s = getPrimeSet(); if (!s.disjointFrom(context.specialPrimes)) { // scale down noise curNoise -= context.logOfProduct(context.specialPrimes); s.remove(context.specialPrimes); } /* We compare below to noiseThreshold+1 rather than to noiseThreshold * to make sure that if you mod-switch down to c.findBaseSet() and * then immediately call c.findBaseSet() again, it will not tell you * to mod-switch further down. Note that mod-switching adds close to * noiseThreshold to the scaled noise, so if the scaled noise was * equal to noiseThreshold then after mod-switchign you would have * roughly twice as much noise. Since we're mesuring the log, it means * that you may have as much as noiseThreshold+log(2), which we round * up to noiseThreshold+1 in the test below. */ if (curNoise<=noiseThreshold+1) return; // no need to mod down // if the first prime in half size, begin by removing it if (halfSize && s.contains(0)) { curNoise -= firstNoise; s.remove(0); } // while noise is larger than threshold, scale down by the next prime while (curNoise>noiseThreshold && !empty(s)) { curNoise -= context.logOfPrime(s.last()); s.remove(s.last()); } // Add 1st prime if s is empty or if this does not increase noise too much if (empty(s) || (!s.contains(0) && curNoise+firstNoise<=noiseThreshold)) { s.insert(0); curNoise += firstNoise; } if (curNoise>noiseThreshold && log_of_ratio()>-0.5) cerr << "Ctxt::findBaseSet warning: already at lowest level\n"; }
void IndexSet::retain(const IndexSet& s) { if (this == &s) return; if (s.card() == 0) { clear(); return; } if (card() == 0) return; for (long i = first(); i <= last(); i = next(i)) { if (!s.contains(i)) remove(i); } }
void readContextBinary(istream& str, FHEcontext& context) { assert(readEyeCatcher(str, BINIO_EYE_CONTEXT_BEGIN)==0); // Get the standard deviation context.stdev = read_raw_xdouble(str); long sizeOfS = read_raw_int(str); IndexSet s; for(long tmp, i=0; i<sizeOfS; i++){ tmp = read_raw_int(str); s.insert(tmp); } context.moduli.clear(); context.specialPrimes.clear(); context.ctxtPrimes.clear(); long nPrimes = read_raw_int(str); for (long p,i=0; i<nPrimes; i++) { p = read_raw_int(str); context.moduli.push_back(Cmodulus(context.zMStar,p,0)); if (s.contains(i)) context.specialPrimes.insert(i); // special prime else context.ctxtPrimes.insert(i); // ciphertext prime } long nDigits = read_raw_int(str); context.digits.resize(nDigits); for(long i=0; i<(long)context.digits.size(); i++){ sizeOfS = read_raw_int(str); for(long tmp, n=0; n<sizeOfS; n++){ tmp = read_raw_int(str); context.digits[i].insert(tmp); } } // Read in the partition of m into co-prime factors (if bootstrappable) Vec<long> mv; read_ntl_vec_long(str, mv); long t = read_raw_int(str); bool consFlag = read_raw_int(str); if (mv.length()>0) { context.makeBootstrappable(mv, t, consFlag); } assert(readEyeCatcher(str, BINIO_EYE_CONTEXT_END)==0); }
//! @brief How many levels in the "base-set" for that ciphertext long Ctxt::findBaseLevel() const { IndexSet s; findBaseSet(s); if (context.containsSmallPrime()) { if (s.contains(context.ctxtPrimes.first())) return 2*card(s) -1; // 1st prime is half size else return 2*card(s); } else return card(s); // one prime per level }
// Find the IndexSet such that modDown to that set of primes makes the // additive term due to rounding into the dominant noise term void Ctxt::findBaseSet(IndexSet& s) const { if (getNoiseVar()<=0.0) { // an empty ciphertext s = context.ctxtPrimes; return; } assert(verifyPrimeSet()); bool halfSize = context.containsSmallPrime(); double addedNoise = log(modSwitchAddedNoiseVar())/2; double curNoise = log(getNoiseVar())/2; double firstNoise = context.logOfPrime(0); // remove special primes, if they are included in this->primeSet s = getPrimeSet(); if (!s.disjointFrom(context.specialPrimes)) { // scale down noise curNoise -= context.logOfProduct(context.specialPrimes); s.remove(context.specialPrimes); } if (curNoise<=2*addedNoise) return; // no need to mod down // if the first prime in half size, begin by removing it if (halfSize && s.contains(0)) { curNoise -= firstNoise; s.remove(0); } // while noise is larger than added term, scale down by the next prime while (curNoise>addedNoise && card(s)>1) { curNoise -= context.logOfPrime(s.last()); s.remove(s.last()); } if (halfSize) { // If noise is still too big, drop last big prime and insert half-size prime if (curNoise>addedNoise) { curNoise = firstNoise; s = IndexSet(0); } // Otherwise check if you can add back the half-size prime else if (curNoise+firstNoise <= addedNoise) { curNoise += firstNoise; s.insert(0); } } if (curNoise>addedNoise && log_of_ratio()>-0.5) cerr << "Ctxt::findBaseSet warning: already at lowest level\n"; }
// Modulus-switching down void Ctxt::modDownToLevel(long lvl) { long currentLvl; IndexSet targetSet; IndexSet currentSet = primeSet & context.ctxtPrimes; if (context.containsSmallPrime()) { currentLvl = 2*card(currentSet); if (currentSet.contains(0)) currentLvl--; // first prime is half the size if (lvl & 1) { // odd level, includes the half-size prime targetSet = IndexSet(0,(lvl-1)/2); } else { targetSet = IndexSet(1,lvl/2); } } else { currentLvl = card(currentSet); targetSet = IndexSet(0,lvl-1); // one prime per level } // If target is not below the current level, nothing to do if (lvl >= currentLvl && currentSet==primeSet) return; if (lvl >= currentLvl) { // just remove the special primes targetSet = currentSet; } // sanity-check: interval does not contain special primes assert(targetSet.disjointFrom(context.specialPrimes)); // may need to mod-UP to include the smallest prime if (targetSet.contains(0) && !currentSet.contains(0)) modUpToSet(targetSet); // adds the primes in targetSet / primeSet modDownToSet(targetSet); // removes the primes in primeSet / targetSet }
void Procedure::deleteOrphans() { IndexSet<Value> valuesInBlocks; for (BasicBlock* block : *this) valuesInBlocks.addAll(*block); // Since this method is not on any hot path, we do it conservatively: first a pass to // identify the values to be removed, and then a second pass to remove them. This avoids any // risk of the value iteration being broken by removals. Vector<Value*, 16> toRemove; for (Value* value : values()) { if (!valuesInBlocks.contains(value)) toRemove.append(value); } for (Value* value : toRemove) deleteValue(value); }
void Procedure::dump(PrintStream& out) const { IndexSet<Value> valuesInBlocks; for (BasicBlock* block : *this) { out.print(deepDump(*this, block)); valuesInBlocks.addAll(*block); } bool didPrint = false; for (Value* value : values()) { if (valuesInBlocks.contains(value)) continue; if (!didPrint) { dataLog("Orphaned values:\n"); didPrint = true; } dataLog(" ", deepDump(*this, value), "\n"); } if (m_byproducts->count()) out.print(*m_byproducts); }
void Procedure::setBlockOrderImpl(Vector<BasicBlock*>& blocks) { IndexSet<BasicBlock> blocksSet; blocksSet.addAll(blocks); for (BasicBlock* block : *this) { if (!blocksSet.contains(block)) blocks.append(block); } // Place blocks into this's block list by first leaking all of the blocks and then readopting // them. for (auto& entry : m_blocks) entry.release(); m_blocks.resize(blocks.size()); for (unsigned i = 0; i < blocks.size(); ++i) { BasicBlock* block = blocks[i]; block->m_index = i; m_blocks[i] = std::unique_ptr<BasicBlock>(block); } }
void allocateStack(Code& code) { PhaseScope phaseScope(code, "allocateStack"); // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or // is explicitly escaped in the code. IndexSet<StackSlot> escapingStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->isLocked()) escapingStackSlots.add(slot); } for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (role == Arg::UseAddr && arg.isStack()) escapingStackSlots.add(arg.stackSlot()); }); } } // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for // the possibility of stack slots being assigned frame offsets before we even get here. ASSERT(!code.frameSize()); Vector<StackSlot*> assignedEscapedStackSlots; Vector<StackSlot*> escapedStackSlotsWorklist; for (StackSlot* slot : code.stackSlots()) { if (escapingStackSlots.contains(slot)) { if (slot->offsetFromFP()) assignedEscapedStackSlots.append(slot); else escapedStackSlotsWorklist.append(slot); } else { // It would be super strange to have an unlocked stack slot that has an offset already. ASSERT(!slot->offsetFromFP()); } } // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of // escaped stack slots. while (!escapedStackSlotsWorklist.isEmpty()) { StackSlot* slot = escapedStackSlotsWorklist.takeLast(); assign(slot, assignedEscapedStackSlots); assignedEscapedStackSlots.append(slot); } // Now we handle the anonymous slots. StackSlotLiveness liveness(code); IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); Vector<StackSlot*> slots; for (BasicBlock* block : code) { StackSlotLiveness::LocalCalc localCalc(liveness, block); auto interfere = [&] (Inst& inst) { if (verbose) dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (!Arg::isDef(role)) return; if (!arg.isStack()) return; StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) return; for (StackSlot* otherSlot : localCalc.live()) { interference[slot].add(otherSlot); interference[otherSlot].add(slot); } }); }; for (unsigned instIndex = block->size(); instIndex--;) { if (verbose) dataLog("Analyzing: ", block->at(instIndex), "\n"); Inst& inst = block->at(instIndex); interfere(inst); localCalc.execute(instIndex); } Inst nop; interfere(nop); } if (verbose) { for (StackSlot* slot : code.stackSlots()) dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); } // Now we assign stack locations. At its heart this algorithm is just first-fit. For each // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no // overlap with other StackSlots that this overlaps with. Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->offsetFromFP()) { // Already assigned an offset. continue; } HashSet<StackSlot*>& interferingSlots = interference[slot]; otherSlots.resize(assignedEscapedStackSlots.size()); otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); unsigned nextIndex = assignedEscapedStackSlots.size(); for (StackSlot* otherSlot : interferingSlots) otherSlots[nextIndex++] = otherSlot; assign(slot, otherSlots); } // Figure out how much stack we're using for stack slots. unsigned frameSizeForStackSlots = 0; for (StackSlot* slot : code.stackSlots()) { frameSizeForStackSlots = std::max( frameSizeForStackSlots, static_cast<unsigned>(-slot->offsetFromFP())); } frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSize(arg.offset() + 8); } } } } code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { switch (arg.kind()) { case Arg::Stack: arg = Arg::addr( Tmp(GPRInfo::callFrameRegister), arg.offset() + arg.stackSlot()->offsetFromFP()); break; case Arg::CallArg: arg = Arg::addr( Tmp(GPRInfo::callFrameRegister), arg.offset() - code.frameSize()); break; default: break; } } } } }
bool eliminateDeadCode(Code& code) { PhaseScope phaseScope(code, "eliminateDeadCode"); HashSet<Tmp> liveTmps; IndexSet<StackSlot> liveStackSlots; bool changed; auto isArgLive = [&] (const Arg& arg) -> bool { switch (arg.kind()) { case Arg::Tmp: if (arg.isReg()) return true; return liveTmps.contains(arg.tmp()); case Arg::Stack: if (arg.stackSlot()->isLocked()) return true; return liveStackSlots.contains(arg.stackSlot()); default: return true; } }; auto addLiveArg = [&] (const Arg& arg) -> bool { switch (arg.kind()) { case Arg::Tmp: if (arg.isReg()) return false; return liveTmps.add(arg.tmp()).isNewEntry; case Arg::Stack: if (arg.stackSlot()->isLocked()) return false; return liveStackSlots.add(arg.stackSlot()); default: return false; } }; auto isInstLive = [&] (Inst& inst) -> bool { if (inst.hasNonArgEffects()) return true; // This instruction should be presumed dead, if its Args are all dead. bool storesToLive = false; inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (!Arg::isDef(role)) return; storesToLive |= isArgLive(arg); }); return storesToLive; }; auto handleInst = [&] (Inst& inst) { if (!isInstLive(inst)) return; // We get here if the Inst is live. For simplicity we say that a live instruction forces // liveness upon everything it mentions. for (Arg& arg : inst.args) { changed |= addLiveArg(arg); arg.forEachTmpFast( [&] (Tmp& tmp) { changed |= addLiveArg(tmp); }); } }; auto runForward = [&] () -> bool { changed = false; for (BasicBlock* block : code) { for (Inst& inst : *block) handleInst(inst); } return changed; }; auto runBackward = [&] () -> bool { changed = false; for (unsigned blockIndex = code.size(); blockIndex--;) { BasicBlock* block = code[blockIndex]; for (unsigned instIndex = block->size(); instIndex--;) handleInst(block->at(instIndex)); } return changed; }; for (;;) { // Propagating backward is most likely to be profitable. if (!runBackward()) break; if (!runBackward()) break; // Occasionally propagating forward greatly reduces the likelihood of pathologies. if (!runForward()) break; } unsigned removedInstCount = 0; for (BasicBlock* block : code) { removedInstCount += block->insts().removeAllMatching( [&] (Inst& inst) -> bool { return !isInstLive(inst); }); } return !!removedInstCount; }
bool operator>(const IndexSet& s1, const IndexSet& s2) { return card(s2) < card(s1) && s1.contains(s2); }
bool operator>=(const IndexSet& s1, const IndexSet& s2) { return s1.contains(s2); }
bool operator<(const IndexSet& s1, const IndexSet& s2) { return card(s1) < card(s2) && s2.contains(s1); }
// functional "contains" bool operator<=(const IndexSet& s1, const IndexSet& s2) { return s2.contains(s1); }
void allocateStack(Code& code) { PhaseScope phaseScope(code, "allocateStack"); // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or // is explicitly escaped in the code. IndexSet<StackSlot> escapingStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->isLocked()) escapingStackSlots.add(slot); } for (BasicBlock* block : code) { for (Inst& inst : *block) { inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (role == Arg::UseAddr && arg.isStack()) escapingStackSlots.add(arg.stackSlot()); }); } } // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for // the possibility of stack slots being assigned frame offsets before we even get here. ASSERT(!code.frameSize()); Vector<StackSlot*> assignedEscapedStackSlots; Vector<StackSlot*> escapedStackSlotsWorklist; for (StackSlot* slot : code.stackSlots()) { if (escapingStackSlots.contains(slot)) { if (slot->offsetFromFP()) assignedEscapedStackSlots.append(slot); else escapedStackSlotsWorklist.append(slot); } else { // It would be super strange to have an unlocked stack slot that has an offset already. ASSERT(!slot->offsetFromFP()); } } // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of // escaped stack slots. while (!escapedStackSlotsWorklist.isEmpty()) { StackSlot* slot = escapedStackSlotsWorklist.takeLast(); assign(slot, assignedEscapedStackSlots); assignedEscapedStackSlots.append(slot); } // Now we handle the anonymous slots. StackSlotLiveness liveness(code); IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); Vector<StackSlot*> slots; for (BasicBlock* block : code) { StackSlotLiveness::LocalCalc localCalc(liveness, block); auto interfere = [&] (unsigned instIndex) { if (verbose) dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); Inst::forEachDef<Arg>( block->get(instIndex), block->get(instIndex + 1), [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { if (!arg.isStack()) return; StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) return; for (StackSlot* otherSlot : localCalc.live()) { interference[slot].add(otherSlot); interference[otherSlot].add(slot); } }); }; for (unsigned instIndex = block->size(); instIndex--;) { if (verbose) dataLog("Analyzing: ", block->at(instIndex), "\n"); // Kill dead stores. For simplicity we say that a store is killable if it has only late // defs and those late defs are to things that are dead right now. We only do that // because that's the only kind of dead stack store we will see here. Inst& inst = block->at(instIndex); if (!inst.hasNonArgEffects()) { bool ok = true; inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { if (Arg::isEarlyDef(role)) { ok = false; return; } if (!Arg::isLateDef(role)) return; if (!arg.isStack()) { ok = false; return; } StackSlot* slot = arg.stackSlot(); if (slot->kind() != StackSlotKind::Anonymous) { ok = false; return; } if (localCalc.isLive(slot)) { ok = false; return; } }); if (ok) inst = Inst(); } interfere(instIndex); localCalc.execute(instIndex); } interfere(-1); block->insts().removeAllMatching( [&] (const Inst& inst) -> bool { return !inst; }); } if (verbose) { for (StackSlot* slot : code.stackSlots()) dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); } // Now we assign stack locations. At its heart this algorithm is just first-fit. For each // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no // overlap with other StackSlots that this overlaps with. Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; for (StackSlot* slot : code.stackSlots()) { if (slot->offsetFromFP()) { // Already assigned an offset. continue; } HashSet<StackSlot*>& interferingSlots = interference[slot]; otherSlots.resize(assignedEscapedStackSlots.size()); otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); unsigned nextIndex = assignedEscapedStackSlots.size(); for (StackSlot* otherSlot : interferingSlots) otherSlots[nextIndex++] = otherSlot; assign(slot, otherSlots); } // Figure out how much stack we're using for stack slots. unsigned frameSizeForStackSlots = 0; for (StackSlot* slot : code.stackSlots()) { frameSizeForStackSlots = std::max( frameSizeForStackSlots, static_cast<unsigned>(-slot->offsetFromFP())); } frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); // Now we need to deduce how much argument area we need. for (BasicBlock* block : code) { for (Inst& inst : *block) { for (Arg& arg : inst.args) { if (arg.isCallArg()) { // For now, we assume that we use 8 bytes of the call arg. But that's not // such an awesome assumption. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 ASSERT(arg.offset() >= 0); code.requestCallArgAreaSize(arg.offset() + 8); } } } } code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize()); // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless // transformation since we can search the StackSlots array to figure out which StackSlot any // offset-from-FP refers to. // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame. // We would have to scavenge for temporaries if this happened. Fortunately, this case will be // extremely rare so we can do crazy things when it arises. // https://bugs.webkit.org/show_bug.cgi?id=152530 InsertionSet insertionSet(code); for (BasicBlock* block : code) { for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); inst.forEachArg( [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) { auto stackAddr = [&] (int32_t offset) -> Arg { return Arg::stackAddr(offset, code.frameSize(), width); }; switch (arg.kind()) { case Arg::Stack: { StackSlot* slot = arg.stackSlot(); if (Arg::isZDef(role) && slot->kind() == StackSlotKind::Anonymous && slot->byteSize() > Arg::bytes(width)) { // Currently we only handle this simple case because it's the only one // that arises: ZDef's are only 32-bit right now. So, when we hit these // assertions it means that we need to implement those other kinds of // zero fills. RELEASE_ASSERT(slot->byteSize() == 8); RELEASE_ASSERT(width == Arg::Width32); RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack)); insertionSet.insert( instIndex + 1, StoreZero32, inst.origin, stackAddr(arg.offset() + 4 + slot->offsetFromFP())); } arg = stackAddr(arg.offset() + slot->offsetFromFP()); break; } case Arg::CallArg: arg = stackAddr(arg.offset() - code.frameSize()); break; default: break; } } ); } insertionSet.execute(block); } }