int main() { __v4sf x; _mm_loadh_pi(x, 0); const __v2sf* cx = 0; __builtin_ia32_loadhps(x, cx); { __v4sf a = {1,2,3,4}; __v4sf b = {5,6,7,8}; __v4sf c = {9,10,11,12}; printf4vector(&a); printf4vector(&b); printf4vector(&c); //builtin "ia32_mulps" not supported by clang -- use _mm_mul_ps __v4sf tmp = _mm_mul_ps(a, b); // a * b printf4vector(&tmp); //builtin "ia32_addps" not supported by clang -- use _mm_add_ps __v4sf e = _mm_add_ps(tmp, c); // e = (a * b) + c printf4vector(&e); } return 0; }
int main() { union f4vector a, b, c; a.f[0] = 1.1; a.f[1] = 2; a.f[2] = 3; a.f[3] = 4.0; b.f[0] = 5.0; b.f[1] = 6; b.f[2] = 7; b.f[3] = 8.0; c.f[0] = 9.1; c.f[1] = 10; c.f[2] = 11; c.f[3] = 12.3; v4sf tmp = __builtin_ia32_mulps (a.v, b.v); v4sf foo = __builtin_ia32_addps(tmp, c.v); std::cout << "calculate foo = (a * b) + c:\n"; std::cout << "a = "; printf4vector(&a.v); std::cout << "b = "; printf4vector(&b.v); std::cout << "c = "; printf4vector(&c.v); std::cout << "foo = "; printf4vector(&foo); v4si A ={1,2,3,4}; v4si B ={5,6,7,8}; v4si C; std::cout << "A : "; printi4vector(&A); std::cout << "B : "; printi4vector(&B); C = A + B; std::cout << "C=A+B : "; printi4vector(&C); C = A - B; std::cout << "C=A-B : "; printi4vector(&C); C = B - A; std::cout << "C=B-A : "; printi4vector(&C); C = A * B; std::cout << "C=A*B : "; printi4vector(&C); C = B / A; std::cout << "C=B/A : "; printi4vector(&C); C = A / B; std::cout << "C=A/B : "; printi4vector(&C); v4sf fA ={1,2,3,4.5}; v4sf fB ={5,6,7,8.5}; v4sf fC; std::cout << "fA : "; printf4vector(&fA); std::cout << "fB : "; printf4vector(&fB); fC = fA + fB; std::cout << "fC=fA+fB : "; printf4vector(&fC); fC = fA - fB; std::cout << "fC=fA-fB : "; printf4vector(&fC); fC = fB - fA; std::cout << "fC=fB-fA : "; printf4vector(&fC); fC = fA * fB; std::cout << "fC=fA*fB : "; printf4vector(&fC); fC = fA * fA; std::cout << "fC=fA*fA : "; printf4vector(&fC); fC = fB * fB; std::cout << "fC=fB*fB : "; printf4vector(&fC); fC = fA / fB; std::cout << "fC=fA/fB : "; printf4vector(&fC); fC = fB / fA; std::cout << "fC=fB/fA : "; printf4vector(&fC); /* ... */ extern void vectorcode(); vectorcode(); // seems to execute --- no segfault. How about that? }