Пример #1
0
Файл: gen.c Проект: Godzil/osXdk
void function(sSymbol_t f, sSymbol_t caller[], sSymbol_t callee[], int ncalls) 
{
	int i;

	localsize=offset=tmpsize=nbregs=0; funame=f->x.name;
	for (i=8;i<32;i++) temp[i]->x.name="******";
	for (i = 0; caller[i] && callee[i]; i++) 
	{
		caller[i]->x.name=stringf("(ap),%d",offset);
		caller[i]->x.adrmode='A';
		offset+=caller[i]->type->size;
		if (optimizelevel>1 && callee[i]->sclass==REGISTER && allocreg(callee[i]))
			; /* allocreg ok */
		else 
		{
			callee[i]->x.adrmode=caller[i]->x.adrmode;
			callee[i]->x.name=caller[i]->x.name;
			callee[i]->sclass=AUTO;
		}
	}
	busy=localsize=0; offset=6;
	gencode(caller,callee);
	omit_frame=(i==0 && localsize==6);
	print("%s\n",funame);
	if (optimizelevel>1 && omit_frame && nbregs==0)
		;
	else print("\tENTER(%d,%d)\n",nbregs,localsize);
	if (isstruct(freturn(f->type)))
		print("\tMOVW_DI(op1,(fp),6)\n");
	emitcode();
}
Пример #2
0
code *xmmneg(elem *e,regm_t *pretregs)
{
    //printf("xmmneg()\n");
    //elem_print(e);
    assert(*pretregs);
    tym_t tyml = tybasic(e->E1->Ety);
    int sz = _tysize[tyml];

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    /* Generate:
     *    MOV reg,e1
     *    MOV rreg,signbit
     *    XOR reg,rreg
     */
    CodeBuilder cdb;
    cdb.append(codelem(e->E1,&retregs,FALSE));
    cdb.append(getregs(retregs));
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    unsigned rreg;
    cdb.append(allocreg(&rretregs,&rreg,tyml));
    targ_size_t signbit = 0x80000000;
    if (sz == 8)
        signbit = 0x8000000000000000LL;
    cdb.append(movxmmconst(rreg, sz, signbit, 0));

    cdb.append(getregs(retregs));
    unsigned op = (sz == 8) ? XORPD : XORPS;       // XORPD/S reg,rreg
    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    cdb.append(fixresult(e,retregs,pretregs));
    return cdb.finish();
}
Пример #3
0
Файл: cgxmm.c Проект: dsagal/dmd
code *orthxmm(elem *e, regm_t *pretregs)
{   elem *e1 = e->E1;
    elem *e2 = e->E2;
    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;
    code *c = codelem(e1,&retregs,FALSE); // eval left leaf
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    code *cr = scodelem(e2, &rretregs, retregs, TRUE);  // eval right leaf

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    unsigned rreg = findreg(rretregs);

    // float + ifloat is not actually addition
    if ((e->Eoper == OPadd || e->Eoper == OPmin) &&
        ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) ||
         (tyreal(e2->Ety) && tyimaginary(e1->Ety))))
    {
        retregs |= rretregs;
        c = cat(c, cr);
        if (e->Eoper == OPmin)
        {
            unsigned nretregs = XMMREGS & ~retregs;
            unsigned sreg; // hold sign bit
            unsigned sz = tysize[tybasic(e1->Ety)];
            c = cat(c,allocreg(&nretregs,&sreg,e2->Ety));
            targ_size_t signbit = 0x80000000;
            if (sz == 8)
                signbit = 0x8000000000000000LL;
            c = cat(c, movxmmconst(sreg, sz, signbit, 0));
            c = cat(c, getregs(nretregs));
            unsigned xop = (sz == 8) ? XORPD : XORPS;       // XORPD/S rreg,sreg
            c = cat(c, gen2(CNIL,xop,modregxrmx(3,rreg-XMM0,sreg-XMM0)));
        }
        if (retregs != *pretregs)
            c = cat(c, fixresult(e,retregs,pretregs));
        return c;
    }

    /* We should take advantage of mem addressing modes for OP XMM,MEM
     * but we do not at the moment.
     */
    code *cg;
    if (OTrel(e->Eoper))
    {
        retregs = mPSW;
        cg = NULL;
        code *cc = gen2(CNIL,op,modregxrmx(3,rreg-XMM0,reg-XMM0));
        return cat4(c,cr,cg,cc);
    }
    else
        cg = getregs(retregs);

    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    if (retregs != *pretregs)
        co = cat(co,fixresult(e,retregs,pretregs));

    return cat4(c,cr,cg,co);
}
Пример #4
0
Файл: cgxmm.c Проект: dsagal/dmd
code *xmmneg(elem *e,regm_t *pretregs)
{
    //printf("xmmneg()\n");
    //elem_print(e);
    assert(*pretregs);
    tym_t tyml = tybasic(e->E1->Ety);
    int sz = tysize[tyml];

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    /* Generate:
     *    MOV reg,e1
     *    MOV rreg,signbit
     *    XOR reg,rreg
     */
    code *cl = codelem(e->E1,&retregs,FALSE);
    cl = cat(cl,getregs(retregs));
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    unsigned rreg;
    cl = cat(cl,allocreg(&rretregs,&rreg,tyml));
    targ_size_t signbit = 0x80000000;
    if (sz == 8)
        signbit = 0x8000000000000000LL;
    code *c = movxmmconst(rreg, sz, signbit, 0);

    code *cg = getregs(retregs);
    unsigned op = (sz == 8) ? XORPD : XORPS;       // XORPD/S reg,rreg
    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    co = cat(co,fixresult(e,retregs,pretregs));
    return cat4(cl,c,cg,co);
}
Пример #5
0
Файл: gen.c Проект: Godzil/osXdk
void local(sSymbol_t p) 
{
	if (optimizelevel>1 && p->sclass==REGISTER && allocreg(p))
		return; /* allocreg ok */
	if (p->x.name && p->x.name[0]!='*') return; /* keep previous local (it isn't busy) */
	p->x.name = stringf("(fp),%d",offset);
	p->x.adrmode = 'A';
	p->sclass = AUTO;
	offset+=p->type->size;
}
Пример #6
0
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags)
{
    /* Generate:
     *    MOV reg,value
     *    MOV xreg,reg
     * Not so efficient. We should at least do a PXOR for 0.
     */
    assert(mask[xreg] & XMMREGS);
    assert(sz == 4 || sz == 8);
    CodeBuilder cdb;
    if (I32 && sz == 8)
    {
        unsigned r;
        regm_t rm = ALLREGS;
        cdb.append(allocreg(&rm,&r,TYint));         // allocate scratch register
        union { targ_size_t s; targ_long l[2]; } u;
        u.l[1] = 0;
        u.s = value;
        targ_long *p = &u.l[0];
        cdb.append(movregconst(CNIL,r,p[0],0));
        cdb.genfltreg(STO,r,0);                     // MOV floatreg,r
        cdb.append(movregconst(CNIL,r,p[1],0));
        cdb.genfltreg(STO,r,4);                     // MOV floatreg+4,r

        unsigned op = xmmload(TYdouble, true);
        cdb.genxmmreg(op,xreg,0,TYdouble);          // MOVSD XMMreg,floatreg
    }
    else
    {
        unsigned reg;
        cdb.append(regwithvalue(CNIL,ALLREGS,value,&reg,(sz == 8) ? 64 : 0));
        cdb.gen2(LODD,modregxrmx(3,xreg-XMM0,reg));     // MOVD xreg,reg
        if (sz == 8)
            code_orrex(cdb.last(), REX_W);
        checkSetVex(cdb.last(), TYulong);
    }
    return cdb.finish();
}
Пример #7
0
Файл: cgxmm.c Проект: dsagal/dmd
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags)
{
    /* Generate:
     *    MOV reg,value
     *    MOV xreg,reg
     * Not so efficient. We should at least do a PXOR for 0.
     */
    assert(mask[xreg] & XMMREGS);
    assert(sz == 4 || sz == 8);
    code *c;
    if (I32 && sz == 8)
    {
        unsigned r;
        regm_t rm = ALLREGS;
        c = allocreg(&rm,&r,TYint);         // allocate scratch register
        union { targ_size_t s; targ_long l[2]; } u;
        u.l[1] = 0;
        u.s = value;
        targ_long *p = &u.l[0];
        c = movregconst(c,r,p[0],0);
        c = genfltreg(c,0x89,r,0);            // MOV floatreg,r
        c = movregconst(c,r,p[1],0);
        c = genfltreg(c,0x89,r,4);            // MOV floatreg+4,r

        unsigned op = xmmload(TYdouble);
        c = genfltreg(c,op,xreg - XMM0,0);     // MOVSD XMMreg,floatreg
    }
    else
    {
        unsigned reg;
        c = regwithvalue(CNIL,ALLREGS,value,&reg,(sz == 8) ? 64 : 0);
        c = gen2(c,LODD,modregxrmx(3,xreg-XMM0,reg));     // MOVD xreg,reg
        if (sz == 8)
            code_orrex(c, REX_W);
    }
    return c;
}
Пример #8
0
/*
 * Loads an ELF 32 executable.
 */
PRIVATE addr_t load_elf32(struct inode *inode)
{
	int i;                  /* Loop index.                    */
	addr_t addr;            /* Region address.                */
	addr_t entry;           /* Program entry point.           */
	struct elf32_fhdr *elf; /* ELF file header.               */
	struct elf32_phdr *seg; /* ELF Program header.            */
	block_t blk;            /* Working block number.          */
	buffer_t header;        /* File headers block buffer.     */
	struct region *reg;     /* Working memory region.         */
	struct pregion *preg;   /* Working process memory region. */
	
	blk = block_map(inode, 0, 0);
	
	/* Empty file. */
	if (blk == BLOCK_NULL)
	{
		curr_proc->errno = -ENOEXEC;
		return (0);
	}
	
	/* Read ELF file header. */
	header = bread(inode->dev, blk);
	elf = buffer_data(header);
	
	/* Bad ELF file. */
	if (!is_elf(elf))
	{
		brelse(header);
		curr_proc->errno = -ENOEXEC;
		return (0);
	}
	
	/* Bad ELF file. */
	if (elf->e_phoff + elf->e_phnum*elf->e_phentsize > BLOCK_SIZE)
	{
		brelse(header);
		curr_proc->errno = -ENOEXEC;
		return (0);
	}
	
	seg = (struct elf32_phdr *)((char *)buffer_data(header) + elf->e_phoff);
	
	/* Load segments. */
	for (i = 0; i < elf->e_phnum; i++)
	{
		/* Not loadable. */
		if (seg[i].p_type != PT_LOAD)
			continue;
		
		/* Broken executable. */
		if (seg[i].p_filesz > seg[i].p_memsz)
		{
			kprintf("broken executable");
			
			brelse(header);
			curr_proc->errno = -ENOEXEC;
			return (0);
		}
		
		addr = ALIGN(seg[i].p_vaddr, seg[i].p_align);
		
		/* Text section. */
		if (!(seg[i].p_flags ^ (PF_R | PF_X)))
		{
			preg = TEXT(curr_proc);
			reg = allocreg(S_IRUSR | S_IXUSR, seg[i].p_memsz, 0);
		}
		
		/* Data section. */
		else
		{
			preg = DATA(curr_proc);
			reg = allocreg(S_IRUSR | S_IWUSR, seg[i].p_memsz, 0);
		}
		
		/* Failed to allocate region. */
		if (reg == NULL)
		{
			brelse(header);
			curr_proc->errno = -ENOMEM;
			return (0);
		}
		
		/* Attach memory region. */
		if (attachreg(curr_proc, preg, addr, reg))
		{
			freereg(reg);
			brelse(header);
			curr_proc->errno = -ENOMEM;
			return (0);
		}
		
		loadreg(inode, reg, seg[i].p_offset, seg[i].p_filesz);
		
		unlockreg(reg);	
	}
	
	entry = elf->e_entry;
	
	brelse(header);
	
	return (entry);
}
Пример #9
0
/*
 * Executes a program.
 */
PUBLIC int sys_execve(const char *filename, const char **argv, const char **envp)
{
	int i;                /* Loop index.          */
	struct inode *inode;  /* File inode.          */
	struct region *reg;   /* Process region.      */
	addr_t entry;         /* Program entry point. */
	addr_t sp;            /* User stack pointer.  */
	char *name;           /* File name.           */
	char stack[ARG_MAX];  /* Stack size.          */

	/* Get file name. */
	if ((name = getname(filename)) == NULL)
		return (curr_proc->errno);

	/* Build arguments before freeing user memory. */
	kmemset(stack, 0, ARG_MAX);
	if (!(sp = buildargs(stack, ARG_MAX, argv, envp)))
	{
		putname(name);
		return (curr_proc->errno);
	}

	/* Get file's inode. */
	if ((inode = inode_name(name)) == NULL)
	{
		putname(name);
		return (curr_proc->errno);
	}

	/* Not a regular file. */
	if (!S_ISREG(inode->mode))
	{
		putname(name);
		inode_put(inode);
		return (-EACCES);
	}

	/* Not allowed. */
	if (!permission(inode->mode, inode->uid, inode->gid, curr_proc, MAY_EXEC, 0))
	{
		putname(name);
		inode_put(inode);
		return (-EACCES);
	}

	/* Close file descriptors. */
	for (i = 0; i < OPEN_MAX; i++)
	{
		if (curr_proc->close & (1 << i))
			do_close(i);
	}

	/* Detach process memory regions. */
	for (i = 0; i < NR_PREGIONS; i++)
		detachreg(curr_proc, &curr_proc->pregs[i]);
	
	/* Reset signal handlers. */
	curr_proc->restorer = NULL;
	for (i = 0; i < NR_SIGNALS; i++)
	{
		if (curr_proc->handlers[i] != SIG_DFL)
		{
			if (curr_proc->handlers[i] != SIG_IGN)
				curr_proc->handlers[i] = SIG_DFL;
		}
	}
	
	/* Load executable. */
	if (!(entry = load_elf32(inode)))
		goto die0;

	/* Attach stack region. */
	if ((reg = allocreg(S_IRUSR | S_IWUSR, PAGE_SIZE, REGION_DOWNWARDS)) == NULL)
		goto die0;
	if (attachreg(curr_proc, STACK(curr_proc), USTACK_ADDR - 1, reg))
		goto die1;
	unlockreg(reg);

	/* Attach heap region. */
	if ((reg = allocreg(S_IRUSR | S_IWUSR, PAGE_SIZE, REGION_UPWARDS)) == NULL)
		goto die0;
	if (attachreg(curr_proc, HEAP(curr_proc), UHEAP_ADDR, reg))
		goto die1;
	unlockreg(reg);
	
	inode_put(inode);
	putname(name);

	kmemcpy((void *)(USTACK_ADDR - ARG_MAX), stack, ARG_MAX);
	
	user_mode(entry, sp);
	
	/* Will not return. */
	return (0);

die1:
	unlockreg(reg);
	freereg(reg);
die0:
	inode_put(inode);
	putname(name);
	die(((SIGSEGV & 0xff) << 16) | (1 << 9));
	return (-1);
}
Пример #10
0
code *orthxmm(elem *e, regm_t *pretregs)
{
    //printf("orthxmm(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
    elem *e1 = e->E1;
    elem *e2 = e->E2;

    // float + ifloat is not actually addition
    if ((e->Eoper == OPadd || e->Eoper == OPmin) &&
        ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) ||
         (tyreal(e2->Ety) && tyimaginary(e1->Ety))))
    {
        regm_t retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;

        unsigned reg;
        regm_t rretregs;
        unsigned rreg;
        if (tyreal(e1->Ety))
        {
            reg = findreg(retregs);
            rreg = findreg(retregs & ~mask[reg]);
            retregs = mask[reg];
            rretregs = mask[rreg];
        }
        else
        {
            // Pick the second register, not the first
            rreg = findreg(retregs);
            rretregs = mask[rreg];
            reg = findreg(retregs & ~rretregs);
            retregs = mask[reg];
        }
        assert(retregs && rretregs);

        CodeBuilder cdb;
        cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf
        cdb.append(scodelem(e2, &rretregs, retregs, TRUE));  // eval right leaf

        retregs |= rretregs;
        if (e->Eoper == OPmin)
        {
            unsigned nretregs = XMMREGS & ~retregs;
            unsigned sreg; // hold sign bit
            unsigned sz = tysize(e1->Ety);
            cdb.append(allocreg(&nretregs,&sreg,e2->Ety));
            targ_size_t signbit = 0x80000000;
            if (sz == 8)
                signbit = 0x8000000000000000LL;
            cdb.append(movxmmconst(sreg, sz, signbit, 0));
            cdb.append(getregs(nretregs));
            unsigned xop = (sz == 8) ? XORPD : XORPS;       // XORPD/S rreg,sreg
            cdb.gen2(xop,modregxrmx(3,rreg-XMM0,sreg-XMM0));
        }
        if (retregs != *pretregs)
            cdb.append(fixresult(e,retregs,pretregs));
        return cdb.finish();
    }

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;
    CodeBuilder cdb;
    cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf
    unsigned reg = findreg(retregs);
    regm_t rretregs = XMMREGS & ~retregs;
    cdb.append(scodelem(e2, &rretregs, retregs, TRUE));  // eval right leaf

    unsigned rreg = findreg(rretregs);
    unsigned op = xmmoperator(e1->Ety, e->Eoper);

    /* We should take advantage of mem addressing modes for OP XMM,MEM
     * but we do not at the moment.
     */
    if (OTrel(e->Eoper))
    {
        retregs = mPSW;
        cdb.gen2(op,modregxrmx(3,rreg-XMM0,reg-XMM0));
        checkSetVex(cdb.last(), e1->Ety);
        return cdb.finish();
    }
    else
        cdb.append(getregs(retregs));

    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));
    checkSetVex(cdb.last(), e1->Ety);
    if (retregs != *pretregs)
        cdb.append(fixresult(e,retregs,pretregs));

    return cdb.finish();
}
Пример #11
0
code *xmmpost(elem *e,regm_t *pretregs)
{
    elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);

    CodeBuilder cdb;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {
            regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cdb.append(getregs(retregs));       // destroy these regs
        }
    }

    code cs;
    if (!regvar)
    {
        code *c = getlvalue(&cs,e1,0);          // get EA
        cdb.append(c);
        retregs = XMMREGS & ~*pretregs;
        if (!retregs)
            retregs = XMMREGS;
        c = allocreg(&retregs,&reg,ty1);
        cdb.append(c);
        cs.Iop = xmmload(ty1, true);            // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    // Result register
    regm_t resultregs = XMMREGS & *pretregs & ~retregs;
    if (!resultregs)
        resultregs = XMMREGS & ~retregs;
    unsigned resultreg;
    code *c = allocreg(&resultregs, &resultreg, ty1);
    cdb.append(c);

    cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0));   // MOVSS/D resultreg,reg
    checkSetVex(cdb.last(), ty1);

    regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs);
    if (!rretregs)
        rretregs = XMMREGS & ~(retregs | resultregs);
    c = codelem(e2,&rretregs,FALSE); // eval right leaf
    cdb.append(c);
    unsigned rreg = findreg(rretregs);

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0));  // ADD reg,rreg
    checkSetVex(cdb.last(), e1->Ety);

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1,true);      // reverse operand order of MOVS[SD]
        cdb.gen(&cs);
        checkSetVex(cdb.last(), ty1);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cdb.append(getregs_imm(retregs)); // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    cdb.append(fixresult(e,resultregs,pretregs));
    freenode(e1);
    return cdb.finish();
}
Пример #12
0
code *xmmcnvt(elem *e,regm_t *pretregs)
{
    unsigned op=0, regs;
    tym_t ty;
    unsigned char rex = 0;
    bool zx = false; // zero extend uint

    /* There are no ops for integer <-> float/real conversions
     * but there are instructions for them. In order to use these
     * try to fuse chained conversions. Be careful not to loose
     * precision for real to long.
     */
    elem *e1 = e->E1;
    switch (e->Eoper)
    {
    case OPd_f:
        if (e1->Eoper == OPs32_d)
            ;
        else if (I64 && e1->Eoper == OPs64_d)
            rex = REX_W;
        else if (I64 && e1->Eoper == OPu32_d)
        {   rex = REX_W;
            zx = true;
        }
        else
        {   regs = XMMREGS;
            op = CVTSD2SS;
            ty = TYfloat;
            break;
        }
        // directly use si2ss
        regs = ALLREGS;
        e1 = e1->E1;
        op = CVTSI2SS;
        ty = TYfloat;
        break;

    case OPs32_d:              goto Litod;
    case OPs64_d: rex = REX_W; goto Litod;
    case OPu32_d: rex = REX_W; zx = true; goto Litod;
    Litod:
        regs = ALLREGS;
        op = CVTSI2SD;
        ty = TYdouble;
        break;

    case OPd_s32: ty = TYint;  goto Ldtoi;
    case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi;
    case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi;
    Ldtoi:
        regs = XMMREGS;
        switch (e1->Eoper)
        {
        case OPf_d:
            e1 = e1->E1;
            op = CVTTSS2SI;
            break;
        case OPld_d:
            if (e->Eoper == OPd_s64)
                return cnvt87(e,pretregs); // precision
            /* FALL-THROUGH */
        default:
            op = CVTTSD2SI;
            break;
        }
        break;

    case OPf_d:
        regs = XMMREGS;
        op = CVTSS2SD;
        ty = TYdouble;
        break;
    }
    assert(op);

    CodeBuilder cdb;
    cdb.append(codelem(e1, &regs, FALSE));
    unsigned reg = findreg(regs);
    if (reg >= XMM0)
        reg -= XMM0;
    else if (zx)
    {   assert(I64);
        cdb.append(getregs(regs));
        cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit
        code_orflag(cdb.last(),CFvolatile);
    }

    unsigned retregs = *pretregs;
    if (tyxmmreg(ty)) // target is XMM
    {   if (!(*pretregs & XMMREGS))
            retregs = XMMREGS;
    }
    else              // source is XMM
    {   assert(regs & XMMREGS);
        if (!(retregs & ALLREGS))
            retregs = ALLREGS;
    }

    unsigned rreg;
    cdb.append(allocreg(&retregs,&rreg,ty));
    if (rreg >= XMM0)
        rreg -= XMM0;

    cdb.gen2(op, modregxrmx(3,rreg,reg));
    assert(I64 || !rex);
    if (rex)
        code_orrex(cdb.last(), rex);

    if (*pretregs != retregs)
        cdb.append(fixresult(e,retregs,pretregs));
    return cdb.finish();
}
Пример #13
0
/***************
 * Generate code for OPvecfill (broadcast).
 * OPvecfill takes the single value in e1 and
 * fills the vector type with it.
 */
code *cdvecfill(elem *e, regm_t *pretregs)
{
    //printf("cdvecfill(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));

    regm_t retregs = *pretregs & XMMREGS;
    if (!retregs)
        retregs = XMMREGS;

    CodeBuilder cdb;
    code *c;
    code cs;

    elem *e1 = e->E1;
#if 0
    if ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar)
    {
        cr = getlvalue(&cs, e1, RMload | retregs);     // get addressing mode
    }
    else
    {
        unsigned rretregs = XMMREGS & ~retregs;
        cr = scodelem(op2, &rretregs, retregs, TRUE);
        unsigned rreg = findreg(rretregs) - XMM0;
        cs.Irm = modregrm(3,0,rreg & 7);
        cs.Iflags = 0;
        cs.Irex = 0;
        if (rreg & 8)
            cs.Irex |= REX_B;
    }
#endif

    unsigned reg;
    unsigned rreg;
    unsigned varreg;
    regm_t varregm;
    tym_t ty = tybasic(e->Ety);
    switch (ty)
    {
        case TYfloat4:
        case TYfloat8:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
              Lint:
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSS XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSS;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // SHUFPS XMM0,XMM0,0    0F C6 /r ib
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = SHUFPS;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSS XMM,XMM
                    cs.Iop = VBROADCASTSS;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYdouble2:
        case TYdouble4:
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)
               )
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VBROADCASTSD XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                assert((cs.Irm & 0xC0) != 0xC0);           // AVX1 doesn't have register source operands
                cdb.append(allocreg(&retregs,&reg,ty));
                cs.Iop = VBROADCASTSD;
                cs.Irex &= ~REX_W;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                // UNPCKLPD XMM0,XMM0     66 0F 14 /r
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                reg = findreg(retregs) - XMM0;
                cdb.append(getregs(retregs));
                cs.Iop = UNPCKLPD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                if (config.avx >= 2 || tysize(ty) == 32)
                {
                    // VBROADCASTSD XMM,XMM
                    cs.Iop = VBROADCASTSD;
                    checkSetVex(&cs, ty);
                }
                cdb.gen(&cs);
            }
            break;

        case TYschar16:
        case TYuchar16:
        case TYschar32:
        case TYuchar32:
        {
            /* MOVD      XMM0,r
             * PUNPCKLBW XMM0,XMM0
             * PUNPCKLWD XMM0,XMM0
             * PSHUFD    XMM0,XMM0,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
            checkSetVex(cdb.last(),TYschar16);

            cs.Iop = PUNPCKLBW;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cdb.gen(&cs);
            cs.Iop = PUNPCKLWD;
            cdb.gen(&cs);

            cs.Iop = PSHUFD;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            checkSetVex(&cs,TYschar16);
            cdb.gen(&cs);
            if (tysize(ty) == 32)
            {
                // VINSERTF128 YMM0,YMM0,XMM0,1
                cs.Iop = VINSERTF128;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 1;
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            break;
        }

        case TYshort8:
        case TYushort8:
        case TYshort16:
        case TYushort16:
        {
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            if (config.avx || tysize(ty) == 32)
            {
                /*
                 * VPXOR XMM0,XMM0,XMM0
                 * VPINSRW XMM0,XMM0,r,0
                 * VPINSRW XMM0,XMM0,r,1
                 * VPINSRW XMM0,XMM0,r,2
                 * VPINSRW XMM0,XMM0,r,3
                 */
                cdb.append(allocreg(&retregs,&reg, ty));
                cdb.gen2(PXOR,modregxrmx(3,reg-XMM0,reg-XMM0));
                checkSetVex(cdb.last(), TYshort8);
                for (int i = 0; i < tysize(ty) / 4; ++i)
                {
                    cdb.genc2(PINSRW,modregxrmx(3,reg-XMM0,r),i);
                    checkSetVex(cdb.last(), TYshort8);
                }
                if (tysize(ty) == 32)
                {
                    // VINSERTF128 YMM0,YMM0,XMM0,1
                    cs.Iop = VINSERTF128;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 1;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
                else
                {
                    // VPSHUFD XMM0,XMM0,0
                    cs.Iop = PSHUFD;
                    cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0);
                    cs.Iflags = 0;
                    cs.IFL2 = FLconst;
                    cs.IEV2.Vsize_t = 0;
                    checkSetVex(&cs,ty);
                    cdb.gen(&cs);
                }
            }
            else
            {
                /* MOVD      XMM0,r
                 * PUNPCKLWD XMM0,XMM0
                 * PSHUFD    XMM0,XMM0,0
                 */
                c = allocreg(&retregs,&reg, e->Ety);
                cdb.append(c);
                reg -= XMM0;
                cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r
                checkSetVex(cdb.last(),e->Ety);

                cs.Iop = PUNPCKLWD;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);

                cs.Iop = PSHUFD;
                cs.IFL2 = FLconst;
                cs.IEV2.Vsize_t = 0;
                cdb.gen(&cs);
            }
            break;
        }

        case TYlong8:
        case TYulong8:
        case TYlong4:
        case TYulong4:
        {
            if (config.avx &&
                ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) ||
                tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg))
            {
                goto Lint;
            }
            /* MOVD      XMM1,r
             * PSHUFD    XMM0,XMM1,0
             */
            regm_t regm = ALLREGS;
            c = codelem(e1,&regm,FALSE); // eval left leaf
            cdb.append(c);
            unsigned r = findreg(regm);

            c = allocreg(&retregs,&reg, e->Ety);
            cdb.append(c);
            reg -= XMM0;
            cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVD reg,r

            cs.Iop = PSHUFD;
            cs.Irm = modregxrmx(3,reg,reg);
            cs.Iflags = 0;
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = 0;
            if (config.avx >= 2 || tysize(ty) == 32)
            {
                // VBROADCASTSS XMM,XMM
                cs.Iop = VBROADCASTSS;
                checkSetVex(&cs, ty);
            }
            cdb.gen(&cs);
            break;
        }

        case TYllong2:
        case TYullong2:
        case TYllong4:
        case TYullong4:
            if (config.avx || tysize(ty) >= 32)
            {
                if (e1->Eoper == OPvar)
                    e1->EV.sp.Vsym->Sflags &= ~GTregcand;

                // VMOVDDUP XMM,MEM
                cdb.append(getlvalue(&cs, e1, 0));         // get addressing mode
                if ((cs.Irm & 0xC0) == 0xC0)
                {
                    unsigned sreg = ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0));
                    regm_t sregm = XMMREGS;
                    cdb.append(fixresult(e1, mask[sreg], &sregm));
                    unsigned rmreg = findreg(sregm);
                    cs.Irm = (cs.Irm & ~7) | ((rmreg - XMM0) & 7);
                    if ((rmreg - XMM0) & 8)
                        cs.Irex |= REX_B;
                    else
                        cs.Irex &= ~REX_B;
                }
                cdb.append(allocreg(&retregs,&reg,ty));
                if (config.avx >= 2 ||  tysize(ty) >= 32)
                {
                    cs.Iop = VBROADCASTSD;
                    cs.Irex &= ~REX_W;
                }
                else
                    cs.Iop = MOVDDUP;
                code_newreg(&cs,reg - XMM0);
                checkSetVex(&cs,ty);
                cdb.gen(&cs);
            }
            else
            {
                /* MOVQ XMM0,mem128
                 * PUNPCKLQDQ XMM0,XMM0
                 */
                c = codelem(e1,&retregs,FALSE); // eval left leaf
                cdb.append(c);
                unsigned reg = findreg(retregs);
                reg -= XMM0;
                //cdb.gen2(LODD,modregxrmx(3,reg,r));     // MOVQ reg,r

                cs.Iop = PUNPCKLQDQ;
                cs.Irm = modregxrmx(3,reg,reg);
                cs.Iflags = 0;
                cdb.gen(&cs);
            }
            break;

        default:
            assert(0);
    }

    c = fixresult(e,retregs,pretregs);
    cdb.append(c);
    return cdb.finish();
}
Пример #14
0
code *cdvector(elem *e, regm_t *pretregs)
{
    /* e should look like one of:
     *    vector
     *      |
     *    param
     *    /   \
     *  param op2
     *  /   \
     * op   op1
     */

    if (!config.fpxmmregs)
    {   printf("SIMD operations not supported on this platform\n");
        exit(1);
    }

    unsigned n = el_nparams(e->E1);
    elem **params = (elem **)malloc(n * sizeof(elem *));
    assert(params);
    elem **tmp = params;
    el_paramArray(&tmp, e->E1);

#if 0
    printf("cdvector()\n");
    for (int i = 0; i < n; i++)
    {
        printf("[%d]: ", i);
        elem_print(params[i]);
    }
#endif

    if (*pretregs == 0)
    {   /* Evaluate for side effects only
         */
        CodeBuilder cdb;
        for (int i = 0; i < n; i++)
        {
            cdb.append(codelem(params[i], pretregs, FALSE));
            *pretregs = 0;      // in case they got set
        }
        return cdb.finish();
    }

    assert(n >= 2 && n <= 4);

    elem *eop = params[0];
    elem *op1 = params[1];
    elem *op2 = NULL;
    tym_t ty2 = 0;
    if (n >= 3)
    {   op2 = params[2];
        ty2 = tybasic(op2->Ety);
    }

    unsigned op = el_tolong(eop);
#ifdef DEBUG
    assert(!isXMMstore(op));
#endif
    tym_t ty1 = tybasic(op1->Ety);
    unsigned sz1 = _tysize[ty1];
//    assert(sz1 == 16);       // float or double

    regm_t retregs;
    CodeBuilder cdb;
    if (n == 3 && ty2 == TYuchar && op2->Eoper == OPconst)
    {   // Handle: op xmm,imm8

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf
        unsigned reg = findreg(retregs);
        int r;
        switch (op)
        {
            case PSLLD:  r = 6; op = 0x660F72;  break;
            case PSLLQ:  r = 6; op = 0x660F73;  break;
            case PSLLW:  r = 6; op = 0x660F71;  break;
            case PSRAD:  r = 4; op = 0x660F72;  break;
            case PSRAW:  r = 4; op = 0x660F71;  break;
            case PSRLD:  r = 2; op = 0x660F72;  break;
            case PSRLQ:  r = 2; op = 0x660F73;  break;
            case PSRLW:  r = 2; op = 0x660F71;  break;
            case PSRLDQ: r = 3; op = 0x660F73;  break;
            case PSLLDQ: r = 7; op = 0x660F73;  break;

            default:
                printf("op = x%x\n", op);
                assert(0);
                break;
        }
        cdb.append(getregs(retregs));
        cdb.genc2(op,modregrmx(3,r,reg-XMM0), el_tolong(op2));
    }
    else if (n == 2)
    {   /* Handle: op xmm,mem
         * where xmm is written only, not read
         */
        code cs;

        if ((op1->Eoper == OPind && !op1->Ecount) || op1->Eoper == OPvar)
        {
            cdb.append(getlvalue(&cs, op1, RMload));     // get addressing mode
        }
        else
        {
            regm_t rretregs = XMMREGS;
            cdb.append(codelem(op1, &rretregs, FALSE));
            unsigned rreg = findreg(rretregs) - XMM0;
            cs.Irm = modregrm(3,0,rreg & 7);
            cs.Iflags = 0;
            cs.Irex = 0;
            if (rreg & 8)
                cs.Irex |= REX_B;
        }

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        unsigned reg;
        cdb.append(allocreg(&retregs, &reg, e->Ety));
        code_newreg(&cs, reg - XMM0);
        cs.Iop = op;
        cdb.gen(&cs);
    }
    else if (n == 3 || n == 4)
    {   /* Handle:
         *      op xmm,mem        // n = 3
         *      op xmm,mem,imm8   // n = 4
         * Both xmm and mem are operands, evaluate xmm first.
         */

        code cs;

        retregs = *pretregs & XMMREGS;
        if (!retregs)
            retregs = XMMREGS;
        cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf
        unsigned reg = findreg(retregs);

        if ((op2->Eoper == OPind && !op2->Ecount) || op2->Eoper == OPvar)
        {
            cdb.append(getlvalue(&cs, op2, RMload | retregs));     // get addressing mode
        }
        else
        {
            unsigned rretregs = XMMREGS & ~retregs;
            cdb.append(scodelem(op2, &rretregs, retregs, TRUE));
            unsigned rreg = findreg(rretregs) - XMM0;
            cs.Irm = modregrm(3,0,rreg & 7);
            cs.Iflags = 0;
            cs.Irex = 0;
            if (rreg & 8)
                cs.Irex |= REX_B;
        }

        cdb.append(getregs(retregs));
        if (n == 4)
        {
            switch (op)
            {
                case CMPPD:   case CMPSS:   case CMPSD:   case CMPPS:
                case PSHUFD:  case PSHUFHW: case PSHUFLW:
                case BLENDPD: case BLENDPS: case DPPD:    case DPPS:
                case MPSADBW: case PBLENDW:
                case ROUNDPD: case ROUNDPS: case ROUNDSD: case ROUNDSS:
                case SHUFPD:  case SHUFPS:
                    break;
                default:
                    printf("op = x%x\n", op);
                    assert(0);
                    break;
            }
            elem *imm8 = params[3];
            cs.IFL2 = FLconst;
            cs.IEV2.Vsize_t = el_tolong(imm8);
        }
        code_newreg(&cs, reg - XMM0);
        cs.Iop = op;
        cdb.gen(&cs);
    }
    else
        assert(0);
    cdb.append(fixresult(e,retregs,pretregs));
    free(params);
    freenode(e);
    return cdb.finish();
}
Пример #15
0
Файл: cgxmm.c Проект: dsagal/dmd
code *xmmopass(elem *e,regm_t *pretregs)
{   elem *e1 = e->E1;
    elem *e2 = e->E2;
    tym_t ty1 = tybasic(e1->Ety);
    unsigned sz1 = tysize[ty1];
    regm_t rretregs = XMMREGS & ~*pretregs;
    if (!rretregs)
        rretregs = XMMREGS;

    code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf
    unsigned rreg = findreg(rretregs);

    code cs;
    code *cl,*cg;

    regm_t retregs;
    unsigned reg;
    bool regvar = FALSE;
    if (config.flags4 & CFG4optimized)
    {
        // Be careful of cases like (x = x+x+x). We cannot evaluate in
        // x if x is in a register.
        unsigned varreg;
        regm_t varregm;
        if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
            doinreg(e1->EV.sp.Vsym,e2)          // and we can compute directly into it
           )
        {   regvar = TRUE;
            retregs = varregm;
            reg = varreg;                       // evaluate directly in target register
            cl = NULL;
            cg = getregs(retregs);              // destroy these regs
        }
    }

    if (!regvar)
    {
        cl = getlvalue(&cs,e1,rretregs);        // get EA
        retregs = *pretregs & XMMREGS & ~rretregs;
        if (!retregs)
            retregs = XMMREGS & ~rretregs;
        cg = allocreg(&retregs,&reg,ty1);
        cs.Iop = xmmload(ty1);                  // MOVSD xmm,xmm_m64
        code_newreg(&cs,reg - XMM0);
        cg = gen(cg,&cs);
    }

    unsigned op = xmmoperator(e1->Ety, e->Eoper);
    code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0));

    if (!regvar)
    {
        cs.Iop = xmmstore(ty1);           // reverse operand order of MOVS[SD]
        gen(co,&cs);
    }

    if (e1->Ecount ||                     // if lvalue is a CSE or
        regvar)                           // rvalue can't be a CSE
    {
        cl = cat(cl,getregs_imm(retregs));        // necessary if both lvalue and
                                        //  rvalue are CSEs (since a reg
                                        //  can hold only one e at a time)
        cssave(e1,retregs,EOP(e1));     // if lvalue is a CSE
    }

    co = cat(co,fixresult(e,retregs,pretregs));
    freenode(e1);
    return cat4(cr,cl,cg,co);
}