static void emit_load_R32G32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); sse_movlps(cp->func, data, src_ptr); }
static void emit_load4f_2( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { /* Initialize from identity, then pull in low two words: */ sse_movups(&p->func, dest, get_identity(p)); sse_movlps(&p->func, dest, arg0); }
static void emit_load4f_3( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { /* Have to jump through some hoops: * * c 0 0 0 * c 0 0 1 * 0 0 c 1 * a b c 1 */ sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); sse_movlps(&p->func, dest, arg0); }
static void emit_store3f( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { if (p->outputs_safe) { /* Emit the extra dword anyway. This may hurt writecombining, * may cause other problems. */ sse_movups(&p->func, dest, arg0); } else { /* Alternate strategy - emit two, shuffle, emit one. */ sse_movlps(&p->func, dest, arg0); sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ sse_movss(&p->func, x86_make_disp(dest,8), arg0); } }
static void emit_load3f_3( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { /* Over-reads by 1 dword - potential SEGV if input is a vertex * array. */ if (p->inputs_safe) { sse_movups(&p->func, dest, arg0); } else { /* c 0 0 0 * c c c c * a b c c */ sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); sse_movlps(&p->func, dest, arg0); } }
static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { #if 1 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); /* data = z ? ? ? */ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); /* data = z ? 0 1 */ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); /* data = ? 0 z 1 */ sse_movlps(cp->func, data, src_ptr); /* data = x y z 1 */ #else sse_movups(cp->func, data, src_ptr); /* data = x y z ? */ sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) ); /* data = ? x y z */ sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) ); /* data = 1 x y z */ sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) ); /* data = x y z 1 */ #endif }
static void emit_store2f( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { sse_movlps(&p->func, dest, arg0); }
static void emit_load2f_2( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { sse_movlps(&p->func, dest, arg0); }