Beispiel #1
0
code *orthxmm(elem *e, regm_t *pretregs)
{   elem *e1 = e->E1;
    elem *e2 = e->E2;
    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;
    code *c = codelem(e1,&retregs,FALSE); // eval left leaf
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    code *cr = scodelem(e2, &rretregs, retregs, TRUE);  // eval right leaf

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    unsigned rreg = findreg(rretregs);

    // float + ifloat is not actually addition
    if ((e->Eoper == OPadd || e->Eoper == OPmin) &&
        ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) ||
         (tyreal(e2->Ety) && tyimaginary(e1->Ety))))
    {
        retregs |= rretregs;
        c = cat(c, cr);
        if (e->Eoper == OPmin)
        {
            unsigned nretregs = XMMREGS & ~retregs;
            unsigned sreg; // hold sign bit
            unsigned sz = tysize[tybasic(e1->Ety)];
            c = cat(c,allocreg(&nretregs,&sreg,e2->Ety));
            targ_size_t signbit = 0x80000000;
            if (sz == 8)
                signbit = 0x8000000000000000LL;
            c = cat(c, movxmmconst(sreg, sz, signbit, 0));
            c = cat(c, getregs(nretregs));
            unsigned xop = (sz == 8) ? XORPD : XORPS;       // XORPD/S rreg,sreg
            c = cat(c, gen2(CNIL,xop,modregxrmx(3,rreg-XMM0,sreg-XMM0)));
        }
        if (retregs != *pretregs)
            c = cat(c, fixresult(e,retregs,pretregs));
        return c;
    }

    /* We should take advantage of mem addressing modes for OP XMM,MEM
     * but we do not at the moment.
     */
    code *cg;
    if (OTrel(e->Eoper))
    {
        retregs = mPSW;
        cg = NULL;
        code *cc = gen2(CNIL,op,modregxrmx(3,rreg-XMM0,reg-XMM0));
        return cat4(c,cr,cg,cc);
    }
    else
        cg = getregs(retregs);

    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    if (retregs != *pretregs)
        co = cat(co,fixresult(e,retregs,pretregs));

    return cat4(c,cr,cg,co);
}
Beispiel #2
0
code *xmmneg(elem *e,regm_t *pretregs)
{
    //printf("xmmneg()\n");
    //elem_print(e);
    assert(*pretregs);
    tym_t tyml = tybasic(e->E1->Ety);
    int sz = _tysize[tyml];

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    /* Generate:
     *    MOV reg,e1
     *    MOV rreg,signbit
     *    XOR reg,rreg
     */
    CodeBuilder cdb;
    cdb.append(codelem(e->E1,&retregs,FALSE));
    cdb.append(getregs(retregs));
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    unsigned rreg;
    cdb.append(allocreg(&rretregs,&rreg,tyml));
    targ_size_t signbit = 0x80000000;
    if (sz == 8)
        signbit = 0x8000000000000000LL;
    cdb.append(movxmmconst(rreg, sz, signbit, 0));

    cdb.append(getregs(retregs));
    unsigned op = (sz == 8) ? XORPD : XORPS;       // XORPD/S reg,rreg
    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    cdb.append(fixresult(e,retregs,pretregs));
    return cdb.finish();
}
Beispiel #3
0
code *xmmneg(elem *e,regm_t *pretregs)
{
    //printf("xmmneg()\n");
    //elem_print(e);
    assert(*pretregs);
    tym_t tyml = tybasic(e->E1->Ety);
    int sz = tysize[tyml];

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    /* Generate:
     *    MOV reg,e1
     *    MOV rreg,signbit
     *    XOR reg,rreg
     */
    code *cl = codelem(e->E1,&retregs,FALSE);
    cl = cat(cl,getregs(retregs));
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    unsigned rreg;
    cl = cat(cl,allocreg(&rretregs,&rreg,tyml));
    targ_size_t signbit = 0x80000000;
    if (sz == 8)
        signbit = 0x8000000000000000LL;
    code *c = movxmmconst(rreg, sz, signbit, 0);

    code *cg = getregs(retregs);
    unsigned op = (sz == 8) ? XORPD : XORPS;       // XORPD/S reg,rreg
    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    co = cat(co,fixresult(e,retregs,pretregs));
    return cat4(cl,c,cg,co);
}
Beispiel #4
0
Datei: nteh.c Projekt: spott/dmd
code *cdsetjmp(elem *e,regm_t *pretregs)
{   code cs;
    code *c;
    regm_t retregs;
    unsigned stackpushsave;
    unsigned flag;

    c = NULL;
    stackpushsave = stackpush;
#if SCPP
    if (CPP && (funcsym_p->Sfunc->Fflags3 & Fcppeh || usednteh & NTEHcpp))
    {
        /*  If in C++ try block
            If the frame that is calling setjmp has a try,catch block then
            the call to setjmp3 is as follows:
              __setjmp3(environment,3,__cpp_longjmp_unwind,trylevel,funcdata);

            __cpp_longjmp_unwind is a routine in the RTL. This is a
            stdcall routine that will deal with unwinding for CPP Frames.
            trylevel is the value that gets incremented at each catch,
            constructor invocation.
            funcdata is the same value that you put into EAX prior to
            cppframehandler getting called.
         */
        symbol *s;

        s = except_gensym();
        if (!s)
            goto L1;

        c = gencs(c,0x68,0,FLextern,s);                 // PUSH &scope_table
        stackpush += 4;
        genadjesp(c,4);

        c = genc1(c,0xFF,modregrm(1,6,BP),FLconst,(targ_uns)-4);
        // PUSH trylevel
        stackpush += 4;
        genadjesp(c,4);

        cs.Iop = 0x68;
        cs.Iflags = CFoff;
        cs.Irex = 0;
        cs.IFL2 = FLextern;
        cs.IEVsym2 = rtlsym[RTLSYM_CPP_LONGJMP];
        cs.IEVoffset2 = 0;
        c = gen(c,&cs);                         // PUSH &_cpp_longjmp_unwind
        stackpush += 4;
        genadjesp(c,4);

        flag = 3;
    }
    else
#endif
        if (funcsym_p->Sfunc->Fflags3 & Fnteh)
        {
            /*  If in NT SEH try block
                If the frame that is calling setjmp has a try, except block
                then the call to setjmp3 is as follows:
                  __setjmp3(environment,2,__seh_longjmp_unwind,trylevel);
                __seth_longjmp_unwind is supplied by the RTL and is a stdcall
                function. It is the name that MSOFT uses, we should
                probably use the same one.
                trylevel is the value that you increment at each try and
                decrement at the close of the try.  This corresponds to the
                index field of the ehrec.
             */
            int sindex_off;

            sindex_off = 20;                // offset of __context.sindex
            cs.Iop = 0xFF;
            cs.Irm = modregrm(2,6,BPRM);
            cs.Iflags = 0;
            cs.Irex = 0;
            cs.IFL1 = FLbprel;
            cs.IEVsym1 = nteh_contextsym();
            cs.IEVoffset1 = sindex_off;
            c = gen(c,&cs);                 // PUSH scope_index
            stackpush += 4;
            genadjesp(c,4);

            cs.Iop = 0x68;
            cs.Iflags = CFoff;
            cs.Irex = 0;
            cs.IFL2 = FLextern;
            cs.IEVsym2 = rtlsym[RTLSYM_LONGJMP];
            cs.IEVoffset2 = 0;
            c = gen(c,&cs);                 // PUSH &_seh_longjmp_unwind
            stackpush += 4;
            genadjesp(c,4);

            flag = 2;
        }
        else
        {
            /*  If the frame calling setjmp has neither a try..except, nor a
                try..catch, then call setjmp3 as follows:
                _setjmp3(environment,0)
             */
L1:
            flag = 0;
        }

    cs.Iop = 0x68;
    cs.Iflags = 0;
    cs.Irex = 0;
    cs.IFL2 = FLconst;
    cs.IEV2.Vint = flag;
    c = gen(c,&cs);                     // PUSH flag
    stackpush += 4;
    genadjesp(c,4);

    c = cat(c,params(e->E1,REGSIZE));

    c = cat(c,getregs(~rtlsym[RTLSYM_SETJMP3]->Sregsaved & (ALLREGS | mES)));
    gencs(c,0xE8,0,FLfunc,rtlsym[RTLSYM_SETJMP3]);      // CALL __setjmp3

    c = genc2(c,0x81,modregrm(3,0,SP),stackpush - stackpushsave);       // ADD ESP,8
    genadjesp(c,-(stackpush - stackpushsave));

    stackpush = stackpushsave;
    retregs = regmask(e->Ety, TYnfunc);
    return cat(c,fixresult(e,retregs,pretregs));
}
Beispiel #5
0
code *orthxmm(elem *e, regm_t *pretregs)
{
    //printf("orthxmm(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
    elem *e1 = e->E1;
    elem *e2 = e->E2;

    // float + ifloat is not actually addition
    if ((e->Eoper == OPadd || e->Eoper == OPmin) &&
        ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) ||
         (tyreal(e2->Ety) && tyimaginary(e1->Ety))))
    {
        regm_t retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;

        unsigned reg;
        regm_t rretregs;
        unsigned rreg;
        if (tyreal(e1->Ety))
        {
            reg = findreg(retregs);
            rreg = findreg(retregs & ~mask[reg]);
            retregs = mask[reg];
            rretregs = mask[rreg];
        }
        else
        {
            // Pick the second register, not the first
            rreg = findreg(retregs);
            rretregs = mask[rreg];
            reg = findreg(retregs & ~rretregs);
            retregs = mask[reg];
        }
        assert(retregs && rretregs);

        CodeBuilder cdb;
        cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf
        cdb.append(scodelem(e2, &rretregs, retregs, TRUE));  // eval right leaf

        retregs |= rretregs;
        if (e->Eoper == OPmin)
        {
            unsigned nretregs = XMMREGS & ~retregs;
            unsigned sreg; // hold sign bit
            unsigned sz = tysize(e1->Ety);
            cdb.append(allocreg(&nretregs,&sreg,e2->Ety));
            targ_size_t signbit = 0x80000000;
            if (sz == 8)
                signbit = 0x8000000000000000LL;
            cdb.append(movxmmconst(sreg, sz, signbit, 0));
            cdb.append(getregs(nretregs));
            unsigned xop = (sz == 8) ? XORPD : XORPS;       // XORPD/S rreg,sreg
            cdb.gen2(xop,modregxrmx(3,rreg-XMM0,sreg-XMM0));
        }
        if (retregs != *pretregs)
            cdb.append(fixresult(e,retregs,pretregs));
        return cdb.finish();
    }

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;
    CodeBuilder cdb;
    cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    cdb.append(scodelem(e2, &rretregs, retregs, TRUE));  // eval right leaf

    unsigned rreg = findreg(rretregs);
    unsigned op = xmmoperator(e1->Ety, e->Eoper);

    /* We should take advantage of mem addressing modes for OP XMM,MEM
     * but we do not at the moment.
     */
    if (OTrel(e->Eoper))
    {
        retregs = mPSW;
        cdb.gen2(op,modregxrmx(3,rreg-XMM0,reg-XMM0));
        checkSetVex(cdb.last(), e1->Ety);
        return cdb.finish();
    }
    else
        cdb.append(getregs(retregs));

    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    checkSetVex(cdb.last(), e1->Ety);
    if (retregs != *pretregs)
        cdb.append(fixresult(e,retregs,pretregs));

    return cdb.finish();
}
Beispiel #6
0
code *xmmpost(elem *e,regm_t *pretregs)
{
    elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);

    CodeBuilder cdb;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {
            regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cdb.append(getregs(retregs));       // destroy these regs
        }
    }

    code cs;
    if (!regvar)
    {
        code *c = getlvalue(&cs,e1,0);          // get EA
        cdb.append(c);
        retregs = XMMREGS & ~*pretregs;
        if (!retregs)
            retregs = XMMREGS;
        c = allocreg(&retregs,&reg,ty1);
        cdb.append(c);
        cs.Iop = xmmload(ty1, true);            // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    // Result register
    regm_t resultregs = XMMREGS & *pretregs & ~retregs;
    if (!resultregs)
        resultregs = XMMREGS & ~retregs;
    unsigned resultreg;
    code *c = allocreg(&resultregs, &resultreg, ty1);
    cdb.append(c);

    cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0));   // MOVSS/D resultreg,reg
    checkSetVex(cdb.last(), ty1);

    regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs);
    if (!rretregs)
        rretregs = XMMREGS & ~(retregs | resultregs);
    c = codelem(e2,&rretregs,FALSE); // eval right leaf
    cdb.append(c);
    unsigned rreg = findreg(rretregs);

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));  // ADD reg,rreg
    checkSetVex(cdb.last(), e1->Ety);

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1,true);      // reverse operand order of MOVS[SD]
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cdb.append(getregs_imm(retregs)); // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    cdb.append(fixresult(e,resultregs,pretregs));
    freenode(e1);
    return cdb.finish();
}
Beispiel #7
0
code *xmmcnvt(elem *e,regm_t *pretregs)
{
    unsigned op=0, regs;
    tym_t ty;
    unsigned char rex = 0;
    bool zx = false; // zero extend uint

    /* There are no ops for integer <-> float/real conversions
     * but there are instructions for them. In order to use these
     * try to fuse chained conversions. Be careful not to loose
     * precision for real to long.
     */
    elem *e1 = e->E1;
    switch (e->Eoper)
    {
    case OPd_f:
        if (e1->Eoper == OPs32_d)
            ;
        else if (I64 && e1->Eoper == OPs64_d)
            rex = REX_W;
        else if (I64 && e1->Eoper == OPu32_d)
        {   rex = REX_W;
            zx = true;
        }
        else
        {   regs = XMMREGS;
            op = CVTSD2SS;
            ty = TYfloat;
            break;
        }
        // directly use si2ss
        regs = ALLREGS;
        e1 = e1->E1;
        op = CVTSI2SS;
        ty = TYfloat;
        break;

    case OPs32_d:              goto Litod;
    case OPs64_d: rex = REX_W; goto Litod;
    case OPu32_d: rex = REX_W; zx = true; goto Litod;
    Litod:
        regs = ALLREGS;
        op = CVTSI2SD;
        ty = TYdouble;
        break;

    case OPd_s32: ty = TYint;  goto Ldtoi;
    case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi;
    case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi;
    Ldtoi:
        regs = XMMREGS;
        switch (e1->Eoper)
        {
        case OPf_d:
            e1 = e1->E1;
            op = CVTTSS2SI;
            break;
        case OPld_d:
            if (e->Eoper == OPd_s64)
                return cnvt87(e,pretregs); // precision
            /* FALL-THROUGH */
        default:
            op = CVTTSD2SI;
            break;
        }
        break;

    case OPf_d:
        regs = XMMREGS;
        op = CVTSS2SD;
        ty = TYdouble;
        break;
    }
    assert(op);

    CodeBuilder cdb;
    cdb.append(codelem(e1, &regs, FALSE));
    unsigned reg = findreg(regs);
    if (reg >= XMM0)
        reg -= XMM0;
    else if (zx)
    {   assert(I64);
        cdb.append(getregs(regs));
        cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit
        code_orflag(cdb.last(),CFvolatile);
    }

    unsigned retregs = *pretregs;
    if (tyxmmreg(ty)) // target is XMM
    {   if (!(*pretregs & XMMREGS))
            retregs = XMMREGS;
    }
    else              // source is XMM
    {   assert(regs & XMMREGS);
        if (!(retregs & ALLREGS))
            retregs = ALLREGS;
    }

    unsigned rreg;
    cdb.append(allocreg(&retregs,&rreg,ty));
    if (rreg >= XMM0)
        rreg -= XMM0;

    cdb.gen2(op, modregxrmx(3,rreg,reg));
    assert(I64 || !rex);
    if (rex)
        code_orrex(cdb.last(), rex);

    if (*pretregs != retregs)
        cdb.append(fixresult(e,retregs,pretregs));
    return cdb.finish();
}
Beispiel #8
0
code *xmmeq(elem *e, unsigned op, elem *e1, elem *e2,regm_t *pretregs)
{
    tym_t tymll;
    unsigned reg;
    int i;
    code cs;
    elem *e11;
    bool regvar;                  /* TRUE means evaluate into register variable */
    regm_t varregm;
    unsigned varreg;
    targ_int postinc;

    //printf("xmmeq(e1 = %p, e2 = %p, *pretregs = %s)\n", e1, e2, regm_str(*pretregs));
    int e2oper = e2->Eoper;
    tym_t tyml = tybasic(e1->Ety);              /* type of lvalue               */
    regm_t retregs = *pretregs;

    if (!(retregs & XMMREGS))
        retregs = XMMREGS;              // pick any XMM reg

    bool aligned = xmmIsAligned(e1);
    cs.Iop = (op == OPeq) ? xmmstore(tyml, aligned) : op;
    regvar = FALSE;
    varregm = 0;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2) &&       // and we can compute directly into it
            varregm & XMMREGS
           )
        {   regvar = TRUE;
            retregs = varregm;
            reg = varreg;       /* evaluate directly in target register */
        }
    }
    if (*pretregs & mPSW && !EOP(e1))     // if evaluating e1 couldn't change flags
    {   // Be careful that this lines up with jmpopcode()
        retregs |= mPSW;
        *pretregs &= ~mPSW;
    }
    CodeBuilder cdb;
    cdb.append(scodelem(e2,&retregs,0,TRUE));    // get rvalue

    // Look for special case of (*p++ = ...), where p is a register variable
    if (e1->Eoper == OPind &&
        ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) &&
        e11->E1->Eoper == OPvar &&
        e11->E1->EV.sp.Vsym->Sfl == FLreg
       )
    {
        postinc = e11->E2->EV.Vint;
        if (e11->Eoper == OPpostdec)
            postinc = -postinc;
        cdb.append(getlvalue(&cs,e11,RMstore | retregs));
        freenode(e11->E2);
    }
    else
    {   postinc = 0;
        cdb.append(getlvalue(&cs,e1,RMstore | retregs));       // get lvalue (cl == CNIL if regvar)
    }

    cdb.append(getregs_imm(regvar ? varregm : 0));

    reg = findreg(retregs & XMMREGS);
    cs.Irm |= modregrm(0,(reg - XMM0) & 7,0);
    if ((reg - XMM0) & 8)
        cs.Irex |= REX_R;

    // Do not generate mov from register onto itself
    if (!(regvar && reg == XMM0 + ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))))
    {
        cdb.gen(&cs);         // MOV EA+offset,reg
        if (op == OPeq)
            checkSetVex(cdb.last(), tyml);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cdb.append(getregs_imm(retregs));        // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    cdb.append(fixresult(e,retregs,pretregs));
Lp:
    if (postinc)
    {
        int reg = findreg(idxregm(&cs));
        if (*pretregs & mPSW)
        {   // Use LEA to avoid touching the flags
            unsigned rm = cs.Irm & 7;
            if (cs.Irex & REX_B)
                rm |= 8;
            cdb.genc1(0x8D,buildModregrm(2,reg,rm),FLconst,postinc);
            if (tysize(e11->E1->Ety) == 8)
                code_orrex(cdb.last(), REX_W);
        }
        else if (I64)
        {
            cdb.genc2(0x81,modregrmx(3,0,reg),postinc);
            if (tysize(e11->E1->Ety) == 8)
                code_orrex(cdb.last(), REX_W);
        }
        else
        {
            if (postinc == 1)
                cdb.gen1(0x40 + reg);         // INC reg
            else if (postinc == -(targ_int)1)
                cdb.gen1(0x48 + reg);         // DEC reg
            else
            {
                cdb.genc2(0x81,modregrm(3,0,reg),postinc);
            }
        }
    }
    freenode(e1);
    return cdb.finish();
}
Beispiel #9
0
/***************
 * Generate code for OPvecfill (broadcast).
 * OPvecfill takes the single value in e1 and
 * fills the vector type with it.
 */
code *cdvecfill(elem *e, regm_t *pretregs)
{
    //printf("cdvecfill(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    CodeBuilder cdb;
    code *c;
    code cs;

    elem *e1 = e->E1;
#if 0
    if ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar)
    {
        cr = getlvalue(&cs, e1, RMload | retregs);     // get addressing mode
    }
    else
    {
        unsigned rretregs = XMMREGS & ~retregs;
        cr = scodelem(op2, &rretregs, retregs, TRUE);
        unsigned rreg = findreg(rretregs) - XMM0;
        cs.Irm = modregrm(3,0,rreg & 7);
        cs.Iflags = 0;
        cs.Irex = 0;
        if (rreg & 8)
            cs.Irex |= REX_B;
    }
#endif

    unsigned reg;
    unsigned rreg;
    unsigned varreg;
    regm_t varregm;
    tym_t ty = tybasic(e->Ety);
    switch (ty)
    {
        case TYfloat4:
        case TYfloat8:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
              Lint:
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSS XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSS;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // SHUFPS XMM0,XMM0,0    0F C6 /r ib
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = SHUFPS;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSS XMM,XMM
                    cs.Iop = VBROADCASTSS;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYdouble2:
        case TYdouble4:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSD XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSD;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // UNPCKLPD XMM0,XMM0     66 0F 14 /r
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = UNPCKLPD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSD XMM,XMM
                    cs.Iop = VBROADCASTSD;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYschar16:
        case TYuchar16:
        case TYschar32:
        case TYuchar32:
        {
            /* MOVD      XMM0,r
             * PUNPCKLBW XMM0,XMM0
             * PUNPCKLWD XMM0,XMM0
             * PSHUFD    XMM0,XMM0,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
            checkSetVex(cdb.last(),TYschar16);

            cs.Iop = PUNPCKLBW;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cdb.gen(&cs);
            cs.Iop = PUNPCKLWD;
            cdb.gen(&cs);

            cs.Iop = PSHUFD;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            checkSetVex(&cs,TYschar16);
            cdb.gen(&cs);
            if (tysize(ty) == 32)
            {
                // VINSERTF128 YMM0,YMM0,XMM0,1
                cs.Iop = VINSERTF128;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 1;
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            break;
        }

        case TYshort8:
        case TYushort8:
        case TYshort16:
        case TYushort16:
        {
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            if (config.avx || tysize(ty) == 32)
            {
                /*
                 * VPXOR XMM0,XMM0,XMM0
                 * VPINSRW XMM0,XMM0,r,0
                 * VPINSRW XMM0,XMM0,r,1
                 * VPINSRW XMM0,XMM0,r,2
                 * VPINSRW XMM0,XMM0,r,3
                 */
                cdb.append(allocreg(&retregs,&reg, ty));
                cdb.gen2(PXOR,modregxrmx(3,reg-XMM0,reg-XMM0));
                checkSetVex(cdb.last(), TYshort8);
                for (int i = 0; i < tysize(ty) / 4; ++i)
                {
                    cdb.genc2(PINSRW,modregxrmx(3,reg-XMM0,r),i);
                    checkSetVex(cdb.last(), TYshort8);
                }
                if (tysize(ty) == 32)
                {
                    // VINSERTF128 YMM0,YMM0,XMM0,1
                    cs.Iop = VINSERTF128;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 1;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
                else
                {
                    // VPSHUFD XMM0,XMM0,0
                    cs.Iop = PSHUFD;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 0;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
            }
            else
            {
                /* MOVD      XMM0,r
                 * PUNPCKLWD XMM0,XMM0
                 * PSHUFD    XMM0,XMM0,0
                 */
                c = allocreg(&retregs,&reg, e->Ety);
                cdb.append(c);
                reg -= XMM0;
                cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
                checkSetVex(cdb.last(),e->Ety);

                cs.Iop = PUNPCKLWD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);

                cs.Iop = PSHUFD;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                cdb.gen(&cs);
            }
            break;
        }

        case TYlong8:
        case TYulong8:
        case TYlong4:
        case TYulong4:
        {
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg))
            {
                goto Lint;
            }
            /* MOVD      XMM1,r
             * PSHUFD    XMM0,XMM1,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r

            cs.Iop = PSHUFD;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            if (config.avx >= 2 || tysize(ty) == 32)
            {
                // VBROADCASTSS XMM,XMM
                cs.Iop = VBROADCASTSS;
                checkSetVex(&cs, ty);
            }
            cdb.gen(&cs);
            break;
        }

        case TYllong2:
        case TYullong2:
        case TYllong4:
        case TYullong4:
            if (config.avx || tysize(ty) >= 32)
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VMOVDDUP XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                if ((cs.Irm & 0xC0) == 0xC0)
                {
                    unsigned sreg = ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0));
                    regm_t sregm = XMMREGS;
                    cdb.append(fixresult(e1, mask[sreg], &sregm));
                    unsigned rmreg = findreg(sregm);
                    cs.Irm = (cs.Irm & ~7) | ((rmreg - XMM0) & 7);
                    if ((rmreg - XMM0) & 8)
                        cs.Irex |= REX_B;
                    else
                        cs.Irex &= ~REX_B;
                }
                cdb.append(allocreg(&retregs,&reg,ty));
                if (config.avx >= 2 ||  tysize(ty) >= 32)
                {
                    cs.Iop = VBROADCASTSD;
                    cs.Irex &= ~REX_W;
                }
                else
                    cs.Iop = MOVDDUP;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                /* MOVQ XMM0,mem128
                 * PUNPCKLQDQ XMM0,XMM0
                 */
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                unsigned reg = findreg(retregs);
                reg -= XMM0;
                //cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVQ reg,r

                cs.Iop = PUNPCKLQDQ;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);
            }
            break;

        default:
            assert(0);
    }

    c = fixresult(e,retregs,pretregs);
    cdb.append(c);
    return cdb.finish();
}
Beispiel #10
0
code *cdvector(elem *e, regm_t *pretregs)
{
    /* e should look like one of:
     *    vector
     *      |
     *    param
     *    /   \
     *  param op2
     *  /   \
     * op   op1
     */

    if (!config.fpxmmregs)
    {   printf("SIMD operations not supported on this platform\n");
        exit(1);
    }

    unsigned n = el_nparams(e->E1);
    elem **params = (elem **)malloc(n * sizeof(elem *));
    assert(params);
    elem **tmp = params;
    el_paramArray(&tmp, e->E1);

#if 0
    printf("cdvector()\n");
    for (int i = 0; i < n; i++)
    {
        printf("[%d]: ", i);
        elem_print(params[i]);
    }
#endif

    if (*pretregs == 0)
    {   /* Evaluate for side effects only
         */
        CodeBuilder cdb;
        for (int i = 0; i < n; i++)
        {
            cdb.append(codelem(params[i], pretregs, FALSE));
            *pretregs = 0;      // in case they got set
        }
        return cdb.finish();
    }

    assert(n >= 2 && n <= 4);

    elem *eop = params[0];
    elem *op1 = params[1];
    elem *op2 = NULL;
    tym_t ty2 = 0;
    if (n >= 3)
    {   op2 = params[2];
        ty2 = tybasic(op2->Ety);
    }

    unsigned op = el_tolong(eop);
#ifdef DEBUG
    assert(!isXMMstore(op));
#endif
    tym_t ty1 = tybasic(op1->Ety);
    unsigned sz1 = _tysize[ty1];
//    assert(sz1 == 16);       // float or double

    regm_t retregs;
    CodeBuilder cdb;
    if (n == 3 && ty2 == TYuchar && op2->Eoper == OPconst)
    {   // Handle: op xmm,imm8

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf
        unsigned reg = findreg(retregs);
        int r;
        switch (op)
        {
            case PSLLD:  r = 6; op = 0x660F72;  break;
            case PSLLQ:  r = 6; op = 0x660F73;  break;
            case PSLLW:  r = 6; op = 0x660F71;  break;
            case PSRAD:  r = 4; op = 0x660F72;  break;
            case PSRAW:  r = 4; op = 0x660F71;  break;
            case PSRLD:  r = 2; op = 0x660F72;  break;
            case PSRLQ:  r = 2; op = 0x660F73;  break;
            case PSRLW:  r = 2; op = 0x660F71;  break;
            case PSRLDQ: r = 3; op = 0x660F73;  break;
            case PSLLDQ: r = 7; op = 0x660F73;  break;

            default:
                printf("op = x%x\n", op);
                assert(0);
                break;
        }
        cdb.append(getregs(retregs));
        cdb.genc2(op,modregrmx(3,r,reg-XMM0), el_tolong(op2));
    }
    else if (n == 2)
    {   /* Handle: op xmm,mem
         * where xmm is written only, not read
         */
        code cs;

        if ((op1->Eoper == OPind && !op1->Ecount) || op1->Eoper == OPvar)
        {
            cdb.append(getlvalue(&cs, op1, RMload));     // get addressing mode
        }
        else
        {
            regm_t rretregs = XMMREGS;
            cdb.append(codelem(op1, &rretregs, FALSE));
            unsigned rreg = findreg(rretregs) - XMM0;
            cs.Irm = modregrm(3,0,rreg & 7);
            cs.Iflags = 0;
            cs.Irex = 0;
            if (rreg & 8)
                cs.Irex |= REX_B;
        }

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        unsigned reg;
        cdb.append(allocreg(&retregs, &reg, e->Ety));
        code_newreg(&cs, reg - XMM0);
        cs.Iop = op;
        cdb.gen(&cs);
    }
    else if (n == 3 || n == 4)
    {   /* Handle:
         *      op xmm,mem        // n = 3
         *      op xmm,mem,imm8   // n = 4
         * Both xmm and mem are operands, evaluate xmm first.
         */

        code cs;

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf
        unsigned reg = findreg(retregs);

        if ((op2->Eoper == OPind && !op2->Ecount) || op2->Eoper == OPvar)
        {
            cdb.append(getlvalue(&cs, op2, RMload | retregs));     // get addressing mode
        }
        else
        {
            unsigned rretregs = XMMREGS & ~retregs;
            cdb.append(scodelem(op2, &rretregs, retregs, TRUE));
            unsigned rreg = findreg(rretregs) - XMM0;
            cs.Irm = modregrm(3,0,rreg & 7);
            cs.Iflags = 0;
            cs.Irex = 0;
            if (rreg & 8)
                cs.Irex |= REX_B;
        }

        cdb.append(getregs(retregs));
        if (n == 4)
        {
            switch (op)
            {
                case CMPPD:   case CMPSS:   case CMPSD:   case CMPPS:
                case PSHUFD:  case PSHUFHW: case PSHUFLW:
                case BLENDPD: case BLENDPS: case DPPD:    case DPPS:
                case MPSADBW: case PBLENDW:
                case ROUNDPD: case ROUNDPS: case ROUNDSD: case ROUNDSS:
                case SHUFPD:  case SHUFPS:
                    break;
                default:
                    printf("op = x%x\n", op);
                    assert(0);
                    break;
            }
            elem *imm8 = params[3];
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = el_tolong(imm8);
        }
        code_newreg(&cs, reg - XMM0);
        cs.Iop = op;
        cdb.gen(&cs);
    }
    else
        assert(0);
    cdb.append(fixresult(e,retregs,pretregs));
    free(params);
    freenode(e);
    return cdb.finish();
}
Beispiel #11
0
code *xmmopass(elem *e,regm_t *pretregs)
{   elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);
    unsigned sz1 = tysize[ty1];
    regm_t rretregs = XMMREGS & ~*pretregs;
    if (!rretregs)
        rretregs = XMMREGS;

    code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf
    unsigned rreg = findreg(rretregs);

    code cs;
    code *cl,*cg;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {   regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cl = NULL;
            cg = getregs(retregs);              // destroy these regs
        }
    }

    if (!regvar)
    {
        cl = getlvalue(&cs,e1,rretregs);        // get EA
        retregs = *pretregs & XMMREGS & ~rretregs;
        if (!retregs)
            retregs = XMMREGS & ~rretregs;
        cg = allocreg(&retregs,&reg,ty1);
        cs.Iop = xmmload(ty1);                  // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cg = gen(cg,&cs);
    }

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1);           // reverse operand order of MOVS[SD]
        gen(co,&cs);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cl = cat(cl,getregs_imm(retregs));        // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    co = cat(co,fixresult(e,retregs,pretregs));
    freenode(e1);
    return cat4(cr,cl,cg,co);
}