Esempio n. 1
0
File: cgxmm.c Progetto: davispuh/dmd
code *xmmpost(elem *e,regm_t *pretregs)
{
    elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);

    CodeBuilder cdb;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {
            regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cdb.append(getregs(retregs));       // destroy these regs
        }
    }

    code cs;
    if (!regvar)
    {
        code *c = getlvalue(&cs,e1,0);          // get EA
        cdb.append(c);
        retregs = XMMREGS & ~*pretregs;
        if (!retregs)
            retregs = XMMREGS;
        c = allocreg(&retregs,&reg,ty1);
        cdb.append(c);
        cs.Iop = xmmload(ty1, true);            // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    // Result register
    regm_t resultregs = XMMREGS & *pretregs & ~retregs;
    if (!resultregs)
        resultregs = XMMREGS & ~retregs;
    unsigned resultreg;
    code *c = allocreg(&resultregs, &resultreg, ty1);
    cdb.append(c);

    cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0));   // MOVSS/D resultreg,reg
    checkSetVex(cdb.last(), ty1);

    regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs);
    if (!rretregs)
        rretregs = XMMREGS & ~(retregs | resultregs);
    c = codelem(e2,&rretregs,FALSE); // eval right leaf
    cdb.append(c);
    unsigned rreg = findreg(rretregs);

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));  // ADD reg,rreg
    checkSetVex(cdb.last(), e1->Ety);

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1,true);      // reverse operand order of MOVS[SD]
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cdb.append(getregs_imm(retregs)); // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    cdb.append(fixresult(e,resultregs,pretregs));
    freenode(e1);
    return cdb.finish();
}
Esempio n. 2
0
File: cgxmm.c Progetto: davispuh/dmd
/***************
 * Generate code for OPvecfill (broadcast).
 * OPvecfill takes the single value in e1 and
 * fills the vector type with it.
 */
code *cdvecfill(elem *e, regm_t *pretregs)
{
    //printf("cdvecfill(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    CodeBuilder cdb;
    code *c;
    code cs;

    elem *e1 = e->E1;
#if 0
    if ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar)
    {
        cr = getlvalue(&cs, e1, RMload | retregs);     // get addressing mode
    }
    else
    {
        unsigned rretregs = XMMREGS & ~retregs;
        cr = scodelem(op2, &rretregs, retregs, TRUE);
        unsigned rreg = findreg(rretregs) - XMM0;
        cs.Irm = modregrm(3,0,rreg & 7);
        cs.Iflags = 0;
        cs.Irex = 0;
        if (rreg & 8)
            cs.Irex |= REX_B;
    }
#endif

    unsigned reg;
    unsigned rreg;
    unsigned varreg;
    regm_t varregm;
    tym_t ty = tybasic(e->Ety);
    switch (ty)
    {
        case TYfloat4:
        case TYfloat8:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
              Lint:
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSS XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSS;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // SHUFPS XMM0,XMM0,0    0F C6 /r ib
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = SHUFPS;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSS XMM,XMM
                    cs.Iop = VBROADCASTSS;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYdouble2:
        case TYdouble4:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSD XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSD;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // UNPCKLPD XMM0,XMM0     66 0F 14 /r
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = UNPCKLPD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSD XMM,XMM
                    cs.Iop = VBROADCASTSD;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYschar16:
        case TYuchar16:
        case TYschar32:
        case TYuchar32:
        {
            /* MOVD      XMM0,r
             * PUNPCKLBW XMM0,XMM0
             * PUNPCKLWD XMM0,XMM0
             * PSHUFD    XMM0,XMM0,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
            checkSetVex(cdb.last(),TYschar16);

            cs.Iop = PUNPCKLBW;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cdb.gen(&cs);
            cs.Iop = PUNPCKLWD;
            cdb.gen(&cs);

            cs.Iop = PSHUFD;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            checkSetVex(&cs,TYschar16);
            cdb.gen(&cs);
            if (tysize(ty) == 32)
            {
                // VINSERTF128 YMM0,YMM0,XMM0,1
                cs.Iop = VINSERTF128;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 1;
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            break;
        }

        case TYshort8:
        case TYushort8:
        case TYshort16:
        case TYushort16:
        {
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            if (config.avx || tysize(ty) == 32)
            {
                /*
                 * VPXOR XMM0,XMM0,XMM0
                 * VPINSRW XMM0,XMM0,r,0
                 * VPINSRW XMM0,XMM0,r,1
                 * VPINSRW XMM0,XMM0,r,2
                 * VPINSRW XMM0,XMM0,r,3
                 */
                cdb.append(allocreg(&retregs,&reg, ty));
                cdb.gen2(PXOR,modregxrmx(3,reg-XMM0,reg-XMM0));
                checkSetVex(cdb.last(), TYshort8);
                for (int i = 0; i < tysize(ty) / 4; ++i)
                {
                    cdb.genc2(PINSRW,modregxrmx(3,reg-XMM0,r),i);
                    checkSetVex(cdb.last(), TYshort8);
                }
                if (tysize(ty) == 32)
                {
                    // VINSERTF128 YMM0,YMM0,XMM0,1
                    cs.Iop = VINSERTF128;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 1;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
                else
                {
                    // VPSHUFD XMM0,XMM0,0
                    cs.Iop = PSHUFD;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 0;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
            }
            else
            {
                /* MOVD      XMM0,r
                 * PUNPCKLWD XMM0,XMM0
                 * PSHUFD    XMM0,XMM0,0
                 */
                c = allocreg(&retregs,&reg, e->Ety);
                cdb.append(c);
                reg -= XMM0;
                cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
                checkSetVex(cdb.last(),e->Ety);

                cs.Iop = PUNPCKLWD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);

                cs.Iop = PSHUFD;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                cdb.gen(&cs);
            }
            break;
        }

        case TYlong8:
        case TYulong8:
        case TYlong4:
        case TYulong4:
        {
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg))
            {
                goto Lint;
            }
            /* MOVD      XMM1,r
             * PSHUFD    XMM0,XMM1,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r

            cs.Iop = PSHUFD;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            if (config.avx >= 2 || tysize(ty) == 32)
            {
                // VBROADCASTSS XMM,XMM
                cs.Iop = VBROADCASTSS;
                checkSetVex(&cs, ty);
            }
            cdb.gen(&cs);
            break;
        }

        case TYllong2:
        case TYullong2:
        case TYllong4:
        case TYullong4:
            if (config.avx || tysize(ty) >= 32)
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VMOVDDUP XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                if ((cs.Irm & 0xC0) == 0xC0)
                {
                    unsigned sreg = ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0));
                    regm_t sregm = XMMREGS;
                    cdb.append(fixresult(e1, mask[sreg], &sregm));
                    unsigned rmreg = findreg(sregm);
                    cs.Irm = (cs.Irm & ~7) | ((rmreg - XMM0) & 7);
                    if ((rmreg - XMM0) & 8)
                        cs.Irex |= REX_B;
                    else
                        cs.Irex &= ~REX_B;
                }
                cdb.append(allocreg(&retregs,&reg,ty));
                if (config.avx >= 2 ||  tysize(ty) >= 32)
                {
                    cs.Iop = VBROADCASTSD;
                    cs.Irex &= ~REX_W;
                }
                else
                    cs.Iop = MOVDDUP;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                /* MOVQ XMM0,mem128
                 * PUNPCKLQDQ XMM0,XMM0
                 */
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                unsigned reg = findreg(retregs);
                reg -= XMM0;
                //cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVQ reg,r

                cs.Iop = PUNPCKLQDQ;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);
            }
            break;

        default:
            assert(0);
    }

    c = fixresult(e,retregs,pretregs);
    cdb.append(c);
    return cdb.finish();
}
Esempio n. 3
0
File: cgxmm.c Progetto: davispuh/dmd
code *xmmeq(elem *e, unsigned op, elem *e1, elem *e2,regm_t *pretregs)
{
    tym_t tymll;
    unsigned reg;
    int i;
    code cs;
    elem *e11;
    bool regvar;                  /* TRUE means evaluate into register variable */
    regm_t varregm;
    unsigned varreg;
    targ_int postinc;

    //printf("xmmeq(e1 = %p, e2 = %p, *pretregs = %s)\n", e1, e2, regm_str(*pretregs));
    int e2oper = e2->Eoper;
    tym_t tyml = tybasic(e1->Ety);              /* type of lvalue               */
    regm_t retregs = *pretregs;

    if (!(retregs & XMMREGS))
        retregs = XMMREGS;              // pick any XMM reg

    bool aligned = xmmIsAligned(e1);
    cs.Iop = (op == OPeq) ? xmmstore(tyml, aligned) : op;
    regvar = FALSE;
    varregm = 0;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2) &&       // and we can compute directly into it
            varregm & XMMREGS
           )
        {   regvar = TRUE;
            retregs = varregm;
            reg = varreg;       /* evaluate directly in target register */
        }
    }
    if (*pretregs & mPSW && !EOP(e1))     // if evaluating e1 couldn't change flags
    {   // Be careful that this lines up with jmpopcode()
        retregs |= mPSW;
        *pretregs &= ~mPSW;
    }
    CodeBuilder cdb;
    cdb.append(scodelem(e2,&retregs,0,TRUE));    // get rvalue

    // Look for special case of (*p++ = ...), where p is a register variable
    if (e1->Eoper == OPind &&
        ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) &&
        e11->E1->Eoper == OPvar &&
        e11->E1->EV.sp.Vsym->Sfl == FLreg
       )
    {
        postinc = e11->E2->EV.Vint;
        if (e11->Eoper == OPpostdec)
            postinc = -postinc;
        cdb.append(getlvalue(&cs,e11,RMstore | retregs));
        freenode(e11->E2);
    }
    else
    {   postinc = 0;
        cdb.append(getlvalue(&cs,e1,RMstore | retregs));       // get lvalue (cl == CNIL if regvar)
    }

    cdb.append(getregs_imm(regvar ? varregm : 0));

    reg = findreg(retregs & XMMREGS);
    cs.Irm |= modregrm(0,(reg - XMM0) & 7,0);
    if ((reg - XMM0) & 8)
        cs.Irex |= REX_R;

    // Do not generate mov from register onto itself
    if (!(regvar && reg == XMM0 + ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))))
    {
        cdb.gen(&cs);         // MOV EA+offset,reg
        if (op == OPeq)
            checkSetVex(cdb.last(), tyml);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cdb.append(getregs_imm(retregs));        // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    cdb.append(fixresult(e,retregs,pretregs));
Lp:
    if (postinc)
    {
        int reg = findreg(idxregm(&cs));
        if (*pretregs & mPSW)
        {   // Use LEA to avoid touching the flags
            unsigned rm = cs.Irm & 7;
            if (cs.Irex & REX_B)
                rm |= 8;
            cdb.genc1(0x8D,buildModregrm(2,reg,rm),FLconst,postinc);
            if (tysize(e11->E1->Ety) == 8)
                code_orrex(cdb.last(), REX_W);
        }
        else if (I64)
        {
            cdb.genc2(0x81,modregrmx(3,0,reg),postinc);
            if (tysize(e11->E1->Ety) == 8)
                code_orrex(cdb.last(), REX_W);
        }
        else
        {
            if (postinc == 1)
                cdb.gen1(0x40 + reg);         // INC reg
            else if (postinc == -(targ_int)1)
                cdb.gen1(0x48 + reg);         // DEC reg
            else
            {
                cdb.genc2(0x81,modregrm(3,0,reg),postinc);
            }
        }
    }
    freenode(e1);
    return cdb.finish();
}
Esempio n. 4
0
File: cgxmm.c Progetto: dsagal/dmd
code *xmmopass(elem *e,regm_t *pretregs)
{   elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);
    unsigned sz1 = tysize[ty1];
    regm_t rretregs = XMMREGS & ~*pretregs;
    if (!rretregs)
        rretregs = XMMREGS;

    code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf
    unsigned rreg = findreg(rretregs);

    code cs;
    code *cl,*cg;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {   regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cl = NULL;
            cg = getregs(retregs);              // destroy these regs
        }
    }

    if (!regvar)
    {
        cl = getlvalue(&cs,e1,rretregs);        // get EA
        retregs = *pretregs & XMMREGS & ~rretregs;
        if (!retregs)
            retregs = XMMREGS & ~rretregs;
        cg = allocreg(&retregs,&reg,ty1);
        cs.Iop = xmmload(ty1);                  // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cg = gen(cg,&cs);
    }

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1);           // reverse operand order of MOVS[SD]
        gen(co,&cs);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cl = cat(cl,getregs_imm(retregs));        // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    co = cat(co,fixresult(e,retregs,pretregs));
    freenode(e1);
    return cat4(cr,cl,cg,co);
}