示例#1
0
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
/// the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
  if (!EHCatchAllValue) return false;

  if (!SelectorIntrinsic) {
    SelectorIntrinsic =
      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
    if (!SelectorIntrinsic) return false;
  }

  bool Changed = false;
  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
    IntrinsicInst *Sel = *I;

    // Index of the "llvm.eh.catch.all.value" variable.
    unsigned OpIdx = Sel->getNumArgOperands() - 1;
    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
    if (GV != EHCatchAllValue) continue;
    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
    Changed = true;
  }

  return Changed;
}
示例#2
0
void DecomposeInsts::decomposeIntrinsics(BasicBlock* bb)
{
    IRBuilder<> builder(module->getContext());

    for (BasicBlock::iterator instI = bb->begin(), instE = bb->end(); instI != instE; /* empty */) {
        Instruction* inst = instI;

        // Note this increment of instI will skip decompositions of the code
        // inserted to decompose.  E.g., if length -> dot, and dot is also to
        // be decomposed, then the decomposition of dot will be skipped
        // unless instI is reset.
        ++instI;

        IntrinsicInst* intrinsic = dyn_cast<IntrinsicInst>(inst);
        if (! intrinsic)
            continue;

        // Useful preamble for most case
        llvm::Value* arg0 = 0;
        llvm::Value* arg1 = 0;
        llvm::Value* arg2 = 0;
        if (inst->getNumOperands() > 0)
            arg0 = inst->getOperand(0);
        if (inst->getNumOperands() > 1)
            arg1 = inst->getOperand(1);
        if (inst->getNumOperands() > 2)
            arg2 = inst->getOperand(2);
        llvm::Value* newInst = 0;
        Type* instTypes[] = { inst->getType(), inst->getType(), inst->getType(), inst->getType() };
        Type* argTypes[] = { arg0->getType(), arg0->getType(), arg0->getType(), arg0->getType() };
        builder.SetInsertPoint(instI);

        switch (intrinsic->getIntrinsicID()) {
        case Intrinsic::gla_fRadians:
            {
                // always decompose
                // arg0 -> arg0 * pi / 180
                const double pi_over_180 = 0.01745329251994329576923690768489;
                newInst = MultiplyByConstant(builder, arg0, pi_over_180);
                break;
            }
        case Intrinsic::gla_fDegrees:
            {
                // always decompose
                // arg0 -> arg0 * 180 / pi
                const double pi_into_180 = 57.295779513082320876798154814105;
                newInst = MultiplyByConstant(builder, arg0, pi_into_180);
                break;
            }
        case Intrinsic::gla_fMin:
            if (backEnd->decomposeIntrinsic(EDiMin)) {
                //
                // min(a,b) = select (a < b), a, b
                //
                llvm::Value* smeared = Smear(builder, module, arg1, arg0);
                newInst = builder.CreateFCmpOLT(arg0, smeared);
                newInst = builder.CreateSelect(newInst, arg0, smeared);
            }
            break;
        case Intrinsic::gla_fMax:
            if (backEnd->decomposeIntrinsic(EDiMax)) {
                //
                // max(a,b) = select (a > b), a, b
                //
                llvm::Value* smeared = Smear(builder, module, arg1, arg0);
                newInst = builder.CreateFCmpOGT(arg0, smeared);
                newInst = builder.CreateSelect(newInst, arg0, smeared);
            }
            break;
        case Intrinsic::gla_fClamp:
            if (backEnd->decomposeIntrinsic(EDiClamp))
            {
                //
                // Clamp(x, minVal, maxVal) is defined to be min(max(x, minVal), maxVal).
                //
                // The 2nd and 3rd arguments match each other, but not necessarily
                // the 1st argument.  In the decomposition, this difference matches 
                // min/max's difference in their 1st and 2nd arguments.
                //
                argTypes[2] = arg1->getType();  // argTypes[*] start at 0 for the return value, arg* start at 0 for operand 0
                Function* max = Intrinsic::getDeclaration(module, Intrinsic::gla_fMax, makeArrayRef(argTypes, 3));
                Function* min = Intrinsic::getDeclaration(module, Intrinsic::gla_fMin, makeArrayRef(argTypes, 3));
                newInst = builder.CreateCall2(max, arg0, arg1);
                newInst = builder.CreateCall2(min, newInst, arg2);

                // Make next iteration revisit this decomposition, in case min
                // or max are decomposed.
                instI = inst;
                ++instI;
            }
            break;

        case Intrinsic::gla_fAsin:
            if (backEnd->decomposeIntrinsic(EDiAsin)) {
                UnsupportedFunctionality("decomposition of gla_fAsin");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAcos:
            if (backEnd->decomposeIntrinsic(EDiAcos))
            {
                // TODO: functionality: Do we need to handle domain errors?  (E.g., bad input value)
                //
                // acos(x) ~= sqrt(1-x)*(a + x*(b + x*(c + x*d)))
                // where  a =  1.57079632679
                //        b = -0.213300989
                //        c =  0.077980478
                //        d = -0.0216409
                //
                double a =  1.57079632679;
                double b = -0.213300989;
                double c =  0.077980478;
                double d = -0.0216409;

                // polynomial part, going right to left...
                llvm::Value* poly;
                poly = MultiplyByConstant(builder, arg0, d);
                poly = AddWithConstant(builder, poly, c);
                poly = builder.CreateFMul(arg0, poly);
                poly = AddWithConstant(builder, poly, b);
                poly = builder.CreateFMul(arg0, poly);
                poly = AddWithConstant(builder, poly, a);

                // sqrt part
                Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2));
                newInst = builder.CreateFNeg(arg0);
                newInst = AddWithConstant(builder, newInst, 1.0);
                newInst = builder.CreateCall(sqrt, newInst);
                newInst = builder.CreateFMul(newInst, poly);
            }
            break;
        case Intrinsic::gla_fAtan:
            if (backEnd->decomposeIntrinsic(EDiAtan)) {
                UnsupportedFunctionality("decomposition of gla_fAtan");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAtan2:
            if (backEnd->decomposeIntrinsic(EDiAtan2)) {
                UnsupportedFunctionality("decomposition of gla_fAtan2");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fCosh:
            if (backEnd->decomposeIntrinsic(EDiCosh)) {
                UnsupportedFunctionality("decomposition of gla_fCosh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fSinh:
            if (backEnd->decomposeIntrinsic(EDiSinh)) {
                UnsupportedFunctionality("decomposition of gla_fSinh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fTanh:
            if (backEnd->decomposeIntrinsic(EDiTanh)) {
                UnsupportedFunctionality("decomposition of gla_fTanh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAcosh:
            if (backEnd->decomposeIntrinsic(EDiACosh)) {
                UnsupportedFunctionality("decomposition of gla_fACosh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAsinh:
            if (backEnd->decomposeIntrinsic(EDiASinh)) {
                UnsupportedFunctionality("decomposition of gla_fASinh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAtanh:
            if (backEnd->decomposeIntrinsic(EDiATanh)) {
                UnsupportedFunctionality("decomposition of gla_fATanh");
                //changed = true;
            }
            break;

        case Intrinsic::gla_fPowi:
            if (backEnd->decomposeIntrinsic(EDiPowi)) {
                UnsupportedFunctionality("decomposition of gla_fPowi");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fExp10:
        case Intrinsic::gla_fExp:
            if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10 && backEnd->decomposeIntrinsic(EDiExp10)) ||
                (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp   && backEnd->decomposeIntrinsic(EDiExp))) {
                //    10^X = 2^(X /(log base 10 of 2))
                // -> 10^X = 2^(X * 3.3219280948873623478703194294894)
                //
                //     e^X = 2^(X /(log base e of 2))
                // ->  e^X = 2^(X * 1.4426950408889634073599246810019)

                //const double inv_log10_e = 2.3025850929940456840179914546844;  // 10 -> e, in case it comes up
                const double inv_log10_2 = 3.3219280948873623478703194294894;  // 10 -> 2
                const double inv_loge_2  = 1.4426950408889634073599246810019;  //  e -> 2

                double multiplier;
                if (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10)
                    multiplier = inv_log10_2;
                else
                    multiplier = inv_loge_2;

                newInst = MultiplyByConstant(builder, arg0, multiplier);
                Function* exp = Intrinsic::getDeclaration(module, Intrinsic::gla_fExp2, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(exp, newInst);
            }
            break;
        case Intrinsic::gla_fLog10:
        case Intrinsic::gla_fLog:
            if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10 && backEnd->decomposeIntrinsic(EDiLog10)) ||
                (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog   && backEnd->decomposeIntrinsic(EDiLog))) {
                //    log base 10 of X = (log base 10 of 2) * (log base 2 of X)
                // -> log base 10 of X = 0.30102999566398119521373889472449 * (log base 2 of X)
                //
                //    log base e  of X = (log base e of 2) * (log base 2 of X)
                // -> log base e  of X = 0.69314718055994530941723212145818 * (log base 2 of X)

                //const double log10_e = 0.43429448190325182765112891891661;  // 10 -> e, in case it comes up
                const double log10_2 = 0.30102999566398119521373889472449;  // 10 -> 2
                const double loge_2  = 0.69314718055994530941723212145818;  //  e -> 2

                double multiplier;
                if (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10)
                    multiplier = log10_2;
                else
                    multiplier = loge_2;

                Function* log = Intrinsic::getDeclaration(module, Intrinsic::gla_fLog2, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(log, arg0);
                newInst = MultiplyByConstant(builder, newInst, multiplier);
            }
            break;

        case Intrinsic::gla_fInverseSqrt:
            if (backEnd->decomposeIntrinsic(EDiInverseSqrt)) {
                Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(sqrt, arg0);
                newInst = builder.CreateFDiv(MakeFloatConstant(module->getContext(), 1.0), newInst);
            }
            break;
        case Intrinsic::gla_fFraction:
            if (backEnd->decomposeIntrinsic(EDiFraction)) {
                UnsupportedFunctionality("decomposition of gla_fFraction");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fSign:
            if (backEnd->decomposeIntrinsic(EDiSign)) {
                UnsupportedFunctionality("decomposition of gla_fSign");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fModF:
            if (backEnd->decomposeIntrinsic(EDiModF)) {
                UnsupportedFunctionality("decomposition of gla_fModF");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fMix:
            if (backEnd->decomposeIntrinsic(EDiMix)) {
                //
                // genType mix (x, y, a) = x * (1 - a) + y * a
                //
                llvm::Value* t;
                t = builder.CreateFNeg(arg2);
                t = AddWithConstant(builder, t, 1.0);
                t = builder.CreateFMul(arg0, t);
                newInst = builder.CreateFMul(arg1, arg2);
                newInst = builder.CreateFAdd(t, newInst);
            }
            break;
        case Intrinsic::gla_fStep:
            if (backEnd->decomposeIntrinsic(EDiStep))
            {
                //
                // step(edge, x) is defined to be 0.0 if x < edge, otherwise 1.0.
                //
                llvm::FCmpInst::Predicate predicate = llvm::FCmpInst::FCMP_OLT;
                llvm::Value* condition = builder.CreateFCmp(predicate, arg1, arg0);
                newInst = builder.CreateSelect(condition, VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 0.0)),
                                                          VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 1.0)));
            }
            break;
        case Intrinsic::gla_fSmoothStep:
            if (backEnd->decomposeIntrinsic(EDiSmoothStep)) {
                //
                // smoothstep (edge0, edge1, x) is defined to be
                //
                //   t = clamp((x – edge0) / (edge1 – edge0), 0, 1)
                //   t * t * (3 – 2 * t)
                //
                // where edge* can be scalar even if x is vector.
                //
                llvm::Value* smeared0 = Smear(builder, module, arg0, arg2);
                llvm::Value* smeared1 = Smear(builder, module, arg1, arg2);
                llvm::Value* numerator   = builder.CreateFSub(arg2, smeared0, "numerator");
                llvm::Value* denominator = builder.CreateFSub(smeared1, smeared0, "denominator");
                llvm::Value* quotient    = builder.CreateFDiv(numerator, denominator, "quotient");
                llvm::Value* zero = MakeFloatConstant(module->getContext(), 0.0);
                llvm::Value* one  = MakeFloatConstant(module->getContext(), 1.0);
                Type* newArgTypes[] = { quotient->getType(), quotient->getType(), zero->getType(), one->getType() };
                Function* clamp = Intrinsic::getDeclaration(module, Intrinsic::gla_fClamp, newArgTypes);
                llvm::Value* t = builder.CreateCall3(clamp, quotient, zero, one);
                newInst = MultiplyByConstant(builder, t, 2.0);
                newInst = SubFromConstant(builder, 3.0, newInst);
                newInst = builder.CreateFMul(t, newInst);
                newInst = builder.CreateFMul(t, newInst);
                
                // Make next iteration revisit this decomposition, in case clamp is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fIsNan:
            if (backEnd->decomposeIntrinsic(EDiIsNan)) {
                UnsupportedFunctionality("decomposition of gla_fIsNan");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFma:
            if (backEnd->decomposeIntrinsic(EDiFma)) {
                UnsupportedFunctionality("decomposition of gla_Fma");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackUnorm2x16:
            if (backEnd->decomposeIntrinsic(EDiPackUnorm2x16)) {
                UnsupportedFunctionality("decomposition of gla_fPackUnorm2x16");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackUnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiPackUnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fPackUnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackSnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiPackSnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fPackSnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackUnorm2x16:
            if (backEnd->decomposeIntrinsic(EDiUnpackUnorm2x16)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackUnorm2x16");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackUnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiUnpackUnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackUnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackSnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiUnpackSnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackSnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackDouble2x32:
            if (backEnd->decomposeIntrinsic(EDiPackDouble2x32)) {
                UnsupportedFunctionality("decomposition of gla_fPackDouble2x32");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackDouble2x32:
            if (backEnd->decomposeIntrinsic(EDiUnpackDouble2x32)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackDouble2x32");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fLength:
            if (backEnd->decomposeIntrinsic(EDiLength)) {
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg0);

                    Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(instTypes, 2));
                    newInst = builder.CreateCall(sqrt, newInst);
                } else {
                    Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2));
                    newInst = builder.CreateCall(abs, arg0);
                }

                // Make next iteration revisit this decomposition, in case dot is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fDistance:
            if (backEnd->decomposeIntrinsic(EDiDistance)) {
                newInst = builder.CreateFSub(arg0, arg1);
                llvm::Type* types[] = { GetBasicType(newInst), newInst->getType() };
                Function* length = Intrinsic::getDeclaration(module, Intrinsic::gla_fLength, types);
                newInst = builder.CreateCall(length, newInst);

                // Make next iteration revisit this decomposition, in case length is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fDot2:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                llvm::Value* element0 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
            }
            break;
        case Intrinsic::gla_fDot3:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                arg0 = newInst;
                llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
                llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2));
                newInst = builder.CreateFAdd(newInst, element);
            }
            break;
        case Intrinsic::gla_fDot4:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                arg0 = newInst;
                llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
                for (int el = 2; el < 4; ++el) {
                    llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), el));
                    newInst = builder.CreateFAdd(newInst, element);
                }
            }
            break;
        case Intrinsic::gla_fCross:
            if (backEnd->decomposeIntrinsic(EDiCross)) {
                // (a1, a2, a3) X (b1, b2, b3) -> (a2*b3 - a3*b2, a3*b1 - a1*b3, a1*b2 - a2*b1)

                llvm::Value* a1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* a2 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                llvm::Value* a3 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2));

                llvm::Value* b1 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* b2 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 1));
                llvm::Value* b3 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 2));

                llvm::Value* empty = llvm::UndefValue::get(arg0->getType());

                bool scalarized = false;

                if (scalarized) {
                    // do it all with scalars

                    // a2*b3 - a3*b2
                    llvm::Value* p1 = builder.CreateFMul(a2, b3);
                    llvm::Value* p2 = builder.CreateFMul(a3, b2);
                    llvm::Value* element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(empty, element, MakeUnsignedConstant(module->getContext(), 0));

                    // a3*b1 - a1*b3
                    p1 = builder.CreateFMul(a3, b1);
                    p2 = builder.CreateFMul(a1, b3);
                    element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 1));

                    // a1*b2 - a2*b1
                    p1 = builder.CreateFMul(a1, b2);
                    p2 = builder.CreateFMul(a2, b1);
                    element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 2));
                } else {
                    // do it all with vectors

                    // (a2, a3, a1)
                    llvm::Value* aPerm;
                    aPerm = builder.CreateInsertElement(empty, a2, MakeUnsignedConstant(module->getContext(), 0));
                    aPerm = builder.CreateInsertElement(aPerm, a3, MakeUnsignedConstant(module->getContext(), 1));
                    aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 2));

                    // (b3, b1, b2)
                    llvm::Value* bPerm;
                    bPerm = builder.CreateInsertElement(empty, b3, MakeUnsignedConstant(module->getContext(), 0));
                    bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 1));
                    bPerm = builder.CreateInsertElement(bPerm, b2, MakeUnsignedConstant(module->getContext(), 2));

                    // first term computation
                    llvm::Value* firstTerm = builder.CreateFMul(aPerm, bPerm);

                    // (a3, a1, a2)
                    aPerm = builder.CreateInsertElement(empty, a3, MakeUnsignedConstant(module->getContext(), 0));
                    aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 1));
                    aPerm = builder.CreateInsertElement(aPerm, a2, MakeUnsignedConstant(module->getContext(), 2));

                    // (b2, b3, b1)
                    bPerm = builder.CreateInsertElement(empty, b2, MakeUnsignedConstant(module->getContext(), 0));
                    bPerm = builder.CreateInsertElement(bPerm, b3, MakeUnsignedConstant(module->getContext(), 1));
                    bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 2));

                    // second term computation
                    newInst = builder.CreateFMul(aPerm, bPerm);

                    // Finish it off
                    newInst = builder.CreateFSub(firstTerm, newInst);
                }
            }
            break;
        case Intrinsic::gla_fNormalize:
            if (backEnd->decomposeIntrinsic(EDiNormalize)) {
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg0);

                    llvm::Type* type[] = { newInst->getType(), newInst->getType() };
                    Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type);
                    newInst = builder.CreateCall(inverseSqrt, newInst);

                    // smear it
                    llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                    for (int c = 0; c < GetComponentCount(arg0); ++c)
                        smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                    newInst = builder.CreateFMul(arg0, smeared);
                } else {
                    newInst = MakeFloatConstant(module->getContext(), 1.0);
                }

                // Make next iteration revisit this decomposition, in case dot or inverse-sqrt
                // are decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fNormalize3D:
            if (backEnd->decomposeIntrinsic(EDiNormalize3D)) {

                // Note:  This does a 3D normalize on a vec3 or vec4.  The width of arg0 does
                // not determine that width of the dot-product input, the "3" in the "3D" does.

                llvm::Type* types[] = { GetBasicType(argTypes[0]), argTypes[0], argTypes[1] };
                Function* dot = Intrinsic::getDeclaration(module, Intrinsic::gla_fDot3, types);
                newInst = builder.CreateCall2(dot, arg0, arg0);

                llvm::Type* type[] = { newInst->getType(), newInst->getType() };
                Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type);
                newInst = builder.CreateCall(inverseSqrt, newInst);

                // smear it
                llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                for (int c = 0; c < GetComponentCount(arg0); ++c)
                    smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                // If we're 4-wide, copy over the original w component
                if (GetComponentCount(arg0) == 4)
                    smeared = builder.CreateInsertElement(smeared, arg0, MakeIntConstant(module->getContext(), 4));

                newInst = builder.CreateFMul(arg0, smeared);

                // Make next iteration revisit this decomposition, in case dot or inverse-sqrt
                // are decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fLit:
            if (backEnd->decomposeIntrinsic(EDiLit)) {
                UnsupportedFunctionality("decomposition of gla_fLit");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFaceForward:
            if (backEnd->decomposeIntrinsic(EDiFaceForward)) {
                //
                // faceForward(N, I, Nref) is defined to be N if dot(Nref, I) < 0, otherwise return –N.
                //
                UnsupportedFunctionality("decomposition of gla_fFaceForward");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fReflect:
            if (backEnd->decomposeIntrinsic(EDiReflect))
            {
                //
                // reflect(I, N) is defined to be I – 2 * dot(N, I) * N,
                // where N may be assumed to be normalized.
                //
                // Note if the number of components is 1, then N == 1 and
                // this turns into I - 2*I, or -I.
                //
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg1);
                    newInst = MultiplyByConstant(builder, newInst, 2.0);

                    // smear this back up to a vector again
                    llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                    for (int c = 0; c < GetComponentCount(arg0); ++c)
                        smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                    newInst = builder.CreateFMul(smeared, arg1);
                    newInst = builder.CreateFSub(arg0, newInst);
                } else {
                    newInst = builder.CreateFNeg(arg0);
                }

                // Make next iteration revisit this decomposition, in case dot
                // is decomposed
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fRefract:
            if (backEnd->decomposeIntrinsic(EDiRefract)) {
                UnsupportedFunctionality("decomposition of gla_fRefract");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFilterWidth:
            if (backEnd->decomposeIntrinsic(EDiFilterWidth)) {
                // filterWidth = abs(dFdx(p)) + abs(dFdy(p))
                Function* dFdx = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdx, makeArrayRef(argTypes, 2));
                Function* dFdy = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdy, makeArrayRef(argTypes, 2));
                Function*  abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs,  makeArrayRef(instTypes, 2));
                llvm::Value* dx = builder.CreateCall(dFdx, arg0);
                llvm::Value* dy = builder.CreateCall(dFdy, arg0);
                dx = builder.CreateCall(abs, dx);
                dy = builder.CreateCall(abs, dy);
                newInst = builder.CreateFAdd(dx, dy);
            }
            break;
        case Intrinsic::gla_fFixedTransform:
            if (backEnd->decomposeIntrinsic(EDiFixedTransform)) {
                UnsupportedFunctionality("decomposition of gla_fFixedTransform");
                //changed = true;
            }
            break;

        case Intrinsic::gla_any:
            if (backEnd->decomposeIntrinsic(EDiAny)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("any() on a scalar");

                newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                for (int c = 1; c < GetComponentCount(arg0); ++c) {
                    llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c));
                    newInst = builder.CreateOr(newInst, comp);
                }
            }
            break;
        case Intrinsic::gla_all:
            if (backEnd->decomposeIntrinsic(EDiAll)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("all() on a scalar");

                newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                for (int c = 1; c < GetComponentCount(arg0); ++c) {
                    llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c));
                    newInst = builder.CreateAnd(newInst, comp);
                }
            }
            break;
        case Intrinsic::gla_not:
            if (backEnd->decomposeIntrinsic(EDiNot)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("not() on a scalar");

                newInst = builder.CreateNot(arg0);
            }
            break;
        case Intrinsic::gla_fTextureSample:
        case Intrinsic::gla_fTextureSampleLodRefZ:
        case Intrinsic::gla_fTextureSampleLodRefZOffset:
        case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
            if (backEnd->decomposeIntrinsic(EDiTextureProjection)) {

                // if projection flag is set, divide all coordinates (and refZ) by projection
                int texFlags = GetConstantInt(intrinsic->getArgOperand(GetTextureOpIndex(ETOFlag)));
                if (texFlags & ETFProjected) {

                    // insert before intrinsic since we are not replacing it
                    builder.SetInsertPoint(inst);

                    // turn off projected flag to reflect decomposition
                    texFlags &= ~ETFProjected;

                    llvm::Value* coords = intrinsic->getArgOperand(GetTextureOpIndex(ETOCoord));

                    // determine how many channels are live after decomposition
                    int newCoordWidth = 0;
                    switch (GetConstantInt(intrinsic->getArgOperand(gla::ETOSamplerType))) {
                    case gla::ESamplerBuffer:
                    case gla::ESampler1D:      newCoordWidth = 1;  break;
                    case gla::ESampler2D:
                    case gla::ESampler2DRect:
                    case gla::ESampler2DMS:    newCoordWidth = 2;  break;
                    case gla::ESampler3D:      newCoordWidth = 3;  break;
                    case gla::ESamplerCube:
                        gla::UnsupportedFunctionality("projection with cube sampler");
                        break;
                    default:
                        assert(0 && "Unknown sampler type");
                        break;
                    }

                    if (texFlags & gla::ETFArrayed)
                        gla::UnsupportedFunctionality("projection with arrayed sampler");

                    // projection resides in last component
                    llvm::Value* projIdx = MakeUnsignedConstant(module->getContext(), GetComponentCount(coords) - 1);
                    llvm::Value* divisor = builder.CreateExtractElement(coords, projIdx);

                    llvm::Type* newCoordType;
                    if (newCoordWidth > 1)
                        newCoordType = llvm::VectorType::get(GetBasicType(coords), newCoordWidth);
                    else
                        newCoordType = GetBasicType(coords);

                    // create space to hold results
                    llvm::Value* newCoords   = llvm::UndefValue::get(newCoordType);
                    llvm::Value* smearedProj = llvm::UndefValue::get(newCoordType);

                    if (newCoordWidth > 1) {
                        for (int i = 0; i < newCoordWidth; ++i) {
                            llvm::Value* idx = MakeUnsignedConstant(module->getContext(), i);

                            // smear projection
                            smearedProj = builder.CreateInsertElement(smearedProj, divisor, idx);

                            // shrink coordinates to remove projection component
                            llvm::Value* oldCoord = builder.CreateExtractElement(coords, idx);
                            newCoords = builder.CreateInsertElement(newCoords, oldCoord, idx);
                        }
                    } else {
                        smearedProj = divisor;
                        newCoords = builder.CreateExtractElement(coords, MakeUnsignedConstant(module->getContext(), 0));
                    }

                    // divide coordinates
                    newCoords = builder.CreateFDiv(newCoords, smearedProj);

                    //
                    // Remaining code declares new intrinsic and modifies call arguments
                    //

                    // build up argTypes for flexible parameters, including result
                    llvm::SmallVector<llvm::Type*, 5> types;

                    // result type
                    types.push_back(intrinsic->getType());

                    // use new coords to reflect shrink
                    types.push_back(newCoords->getType());

                    // add offset
                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZOffset:
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        types.push_back(intrinsic->getArgOperand(ETOOffset)->getType());
                    default:
                        break;
                    }

                    // add gradients
                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        types.push_back(intrinsic->getArgOperand(ETODPdx)->getType());
                        types.push_back(intrinsic->getArgOperand(ETODPdy)->getType());
                    default:
                        break;
                    }

                    // declare the new intrinsic
                    // TODO: functionality: texturing correctness: is this getting the correct non-projective form?
                    Function* texture = Intrinsic::getDeclaration(module, intrinsic->getIntrinsicID(), types);

                    // modify arguments to match new intrinsic
                    intrinsic->setCalledFunction(texture);
                    intrinsic->setArgOperand(ETOFlag, MakeUnsignedConstant(module->getContext(), texFlags));
                    intrinsic->setArgOperand(ETOCoord, newCoords);

                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZ:
                    case Intrinsic::gla_fTextureSampleLodRefZOffset:
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        intrinsic->setArgOperand(ETORefZ, builder.CreateFDiv(intrinsic->getArgOperand(ETORefZ), divisor));                        
                    default:
                        break;
                    }

                    // mark our change, but don't replace the intrinsic
                    changed = true;
                }
            }
            break;

        default:
            // The cases above needs to be comprehensive in terms of checking
            // for what intrinsics to decompose.  If not there the assumption is
            // it never needs to be decomposed.
            break;
        }

        if (newInst) {
            inst->replaceAllUsesWith(newInst);
            inst->dropAllReferences();
            inst->eraseFromParent();
            changed = true;
        }
    }
}