void matrix_multiply2() { int i, j; v2sf m1, m2; v2sf result, temp; for (i = 0; i < 4; i++) { result = (v2sf) {0.0, 0.0}; for (j = 0; j < 4; j+=2) { /* Load two float values into m1 */ m1 = (v2sf) {a[j], a[j+1]}; m2 = (v2sf) {b[j][i], b[j+1][i]}; /* Multiply and add */ result += m1 * m2; } /* Reduction add at the end */ temp = __builtin_mips_addr_ps (result, result); d[i] = __builtin_mips_cvt_s_pl (temp); } }
void matrix_multiply4() { v2sf m1, m2; v2sf n1, n2, n3, n4, n5, n6, n7, n8; v2sf temp1, temp2, temp3, temp4; v2sf result1, result2; /* Load a[0] a[1] values into m1 Load a[2] a[3] values into m2 */ m1 = (v2sf) {a[0], a[1]}; m2 = (v2sf) {a[2], a[3]}; /* Load b[0][0] b[1][0] values into n1 Load b[2][0] b[3][0] values into n2 Load b[0][1] b[1][1] values into n3 Load b[2][1] b[3][1] values into n4 Load b[0][2] b[1][2] values into n5 Load b[2][2] b[3][2] values into n6 Load b[0][3] b[1][3] values into n7 Load b[2][3] b[3][3] values into n8 */ n1 = (v2sf) {b[0][0], b[1][0]}; n2 = (v2sf) {b[2][0], b[3][0]}; n3 = (v2sf) {b[0][1], b[1][1]}; n4 = (v2sf) {b[2][1], b[3][1]}; n5 = (v2sf) {b[0][2], b[1][2]}; n6 = (v2sf) {b[2][2], b[3][2]}; n7 = (v2sf) {b[0][3], b[1][3]}; n8 = (v2sf) {b[2][3], b[3][3]}; temp1 = m1 * n1 + m2 * n2; temp2 = m1 * n3 + m2 * n4; temp3 = m1 * n5 + m2 * n6; temp4 = m1 * n7 + m2 * n8; result1 = __builtin_mips_addr_ps (temp1, temp2); result2 = __builtin_mips_addr_ps (temp3, temp4); f[0] = __builtin_mips_cvt_s_pu (result1); f[1] = __builtin_mips_cvt_s_pl (result1); f[2] = __builtin_mips_cvt_s_pu (result2); f[3] = __builtin_mips_cvt_s_pl (result2); }
void matrix_multiply3() { int i; v2sf m1, m2, n1, n2; v2sf result, temp; m1 = (v2sf) {a[0], a[1]}; m2 = (v2sf) {a[2], a[3]}; for (i = 0; i < 4; i++) { n1 = (v2sf) {b[0][i], b[1][i]}; n2 = (v2sf) {b[2][i], b[3][i]}; /* Multiply and add */ result = m1 * n1 + m2 * n2; /* Reduction add at the end */ temp = __builtin_mips_addr_ps (result, result); e[i] = __builtin_mips_cvt_s_pl (temp); } }
NOMIPS16 int main () { int little_endian; v2sf a, b, c, d; float e,f; int i; union { long long ll; int i[2]; } endianness_test; endianness_test.ll = 1; little_endian = endianness_test.i[0]; /* pll.ps */ a = (v2sf) {1, 2}; b = (v2sf) {3, 4}; c = __builtin_mips_pll_ps (a, b); if (little_endian) // little endian d = (v2sf) {3, 1}; else // big endian d = (v2sf) {2, 4}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); /* pul.ps */ a = (v2sf) {1, 2}; b = (v2sf) {3, 4}; c = __builtin_mips_pul_ps (a, b); if (little_endian) // little endian d = (v2sf) {3, 2}; else // big endian d = (v2sf) {1, 4}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); /* plu.ps */ a = (v2sf) {1, 2}; b = (v2sf) {3, 4}; c = __builtin_mips_plu_ps (a, b); if (little_endian) // little endian d = (v2sf) {4, 1}; else // big endian d = (v2sf) {2, 3}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); /* puu.ps */ a = (v2sf) {1, 2}; b = (v2sf) {3, 4}; c = __builtin_mips_puu_ps (a, b); if (little_endian) // little endian d = (v2sf) {4, 2}; else // big endian d = (v2sf) {1, 3}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); /* cvt.ps.s */ e = 3.4; f = 4.5; a = __builtin_mips_cvt_ps_s (e, f); if (little_endian) // little endian b = (v2sf) {4.5, 3.4}; else // big endian b = (v2sf) {3.4, 4.5}; if (!__builtin_mips_upper_c_eq_ps (a, b) || !__builtin_mips_lower_c_eq_ps (a, b)) abort (); /* cvt.s.pl */ a = (v2sf) {35.1, 120.2}; e = __builtin_mips_cvt_s_pl (a); if (little_endian) // little endian f = 35.1; else // big endian f = 120.2; if (e != f) abort (); /* cvt.s.pu */ a = (v2sf) {30.0, 100.0}; e = __builtin_mips_cvt_s_pu (a); if (little_endian) // little endian f = 100.0; else // big endian f = 30.0; if (e != f) abort (); /* abs.ps */ a = (v2sf) {-3.4, -5.8}; b = __builtin_mips_abs_ps (a); c = (v2sf) {3.4, 5.8}; if (!__builtin_mips_upper_c_eq_ps (b, c) || !__builtin_mips_lower_c_eq_ps (b, c)) abort (); /* alnv.ps with rs = 4*/ a = (v2sf) {1, 2}; b = (v2sf) {3, 4}; i = 4; c = __builtin_mips_alnv_ps (a, b, i); d = (v2sf) {2, 3}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); /* alnv.ps with rs = 0 */ a = (v2sf) {5, 6}; b = (v2sf) {7, 8}; i = 0; c = __builtin_mips_alnv_ps (a, b, i); d = (v2sf) {5, 6}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); printf ("Test Passes\n"); exit (0); }