void exec_vceq_p8(void) { DECL_VARIABLE(vector, poly, 8, 8); DECL_VARIABLE(vector, poly, 8, 16); DECL_VARIABLE(vector2, poly, 8, 8); DECL_VARIABLE(vector2, poly, 8, 16); DECL_VARIABLE(vector_res, uint, 8, 8); DECL_VARIABLE(vector_res, uint, 8, 16); clean_results (); VLOAD(vector, buffer, , poly, p, 8, 8); VLOAD(vector, buffer, q, poly, p, 8, 16); VDUP(vector2, , poly, p, 8, 8, 0xF3); VDUP(vector2, q, poly, p, 8, 16, 0xF4); TEST_VCOMP(INSN_NAME, , poly, p, uint, 8, 8); TEST_VCOMP(INSN_NAME, q, poly, p, uint, 8, 16); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_p8, "p8"); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_q_p8, "p8"); }
void exec_vld1 (void) { /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector) */ /* This test actually tests vdl1 and vst1 at the same time */ #define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer); TEST_VLD1(vector, buffer, , float, f, 32, 2); TEST_VLD1(vector, buffer, q, float, f, 32, 4); #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VLD1(vector, buffer, , float, f, 16, 4); TEST_VLD1(vector, buffer, q, float, f, 16, 8); #endif dump_results_hex (TEST_MSG); }
void exec_vmovn (void) { /* Basic test: vec64=vmovn(vec128), then store the result. */ #define TEST_VMOVN(T1, T2, W, W2, N) \ VECT_VAR(vector64, T1, W2, N) = \ vmovn_##T2##W(VECT_VAR(vector128, T1, W, N)); \ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) DECL_VARIABLE_64BITS_VARIANTS(vector64); DECL_VARIABLE_128BITS_VARIANTS(vector128); TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); clean_results (); TEST_VMOVN(int, s, 16, 8, 8); TEST_VMOVN(int, s, 32, 16, 4); TEST_VMOVN(int, s, 64, 32, 2); TEST_VMOVN(uint, u, 16, 8, 8); TEST_VMOVN(uint, u, 32, 16, 4); TEST_VMOVN(uint, u, 64, 32, 2); CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); }
void vsli_extra(void) { /* Test cases with maximum shift amount (this amount is different from vsri). */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector2); DECL_VARIABLE_ALL_VARIANTS(vector_res); clean_results (); /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); /* Fill input vector2 with arbitrary values. */ VDUP(vector2, , int, s, 8, 8, 2); VDUP(vector2, , int, s, 16, 4, -4); VDUP(vector2, , int, s, 32, 2, 3); VDUP(vector2, , int, s, 64, 1, 100); VDUP(vector2, , uint, u, 8, 8, 20); VDUP(vector2, , uint, u, 16, 4, 30); VDUP(vector2, , uint, u, 32, 2, 40); VDUP(vector2, , uint, u, 64, 1, 2); VDUP(vector2, , poly, p, 8, 8, 20); VDUP(vector2, , poly, p, 16, 4, 30); VDUP(vector2, q, int, s, 8, 16, -10); VDUP(vector2, q, int, s, 16, 8, -20); VDUP(vector2, q, int, s, 32, 4, -30); VDUP(vector2, q, int, s, 64, 2, 24); VDUP(vector2, q, uint, u, 8, 16, 12); VDUP(vector2, q, uint, u, 16, 8, 3); VDUP(vector2, q, uint, u, 32, 4, 55); VDUP(vector2, q, uint, u, 64, 2, 3); VDUP(vector2, q, poly, p, 8, 16, 12); VDUP(vector2, q, poly, p, 16, 8, 3); /* Use maximum allowed shift amount. */ TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 7); TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 15); TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 31); TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 63); TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 7); TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 15); TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 31); TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 63); TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 7); TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 15); TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 7); TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 15); TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 31); TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 63); TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 7); TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 15); TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 31); TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 63); TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 7); TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 15); CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)"); }
void exec_vdup_lane (void) { /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ #define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \ VECT_VAR(vector_res, T1, W, N) = \ vdup##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) /* Input vector can only have 64 bits. */ DECL_VARIABLE_64BITS_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); clean_results (); TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); #endif VLOAD(vector, buffer, , float, f, 32, 2); /* Choose lane arbitrarily. */ TEST_VDUP_LANE(, int, s, 8, 8, 8, 1); TEST_VDUP_LANE(, int, s, 16, 4, 4, 2); TEST_VDUP_LANE(, int, s, 32, 2, 2, 1); TEST_VDUP_LANE(, int, s, 64, 1, 1, 0); TEST_VDUP_LANE(, uint, u, 8, 8, 8, 7); TEST_VDUP_LANE(, uint, u, 16, 4, 4, 3); TEST_VDUP_LANE(, uint, u, 32, 2, 2, 1); TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); #if defined (FP16_SUPPORTED) TEST_VDUP_LANE(, float, f, 16, 4, 4, 3); #endif TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); TEST_VDUP_LANE(q, int, s, 16, 8, 4, 3); TEST_VDUP_LANE(q, int, s, 32, 4, 2, 1); TEST_VDUP_LANE(q, int, s, 64, 2, 1, 0); TEST_VDUP_LANE(q, uint, u, 8, 16, 8, 5); TEST_VDUP_LANE(q, uint, u, 16, 8, 4, 1); TEST_VDUP_LANE(q, uint, u, 32, 4, 2, 0); TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); #if defined (FP16_SUPPORTED) TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3); #endif TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); #if defined (FP16_SUPPORTED) CHECK_RESULTS (TEST_MSG, ""); #else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); #endif }
void exec_vfma (void) { /* Basic test: v4=vfma(v1,v2), then store the result. */ #define TEST_VFMA(Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vfma##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N), \ VECT_VAR(vector3, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) #define CHECK_VFMA_RESULTS(test_name,comment) \ { \ CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ } #define DECL_VABD_VAR(VAR) \ DECL_VARIABLE(VAR, float, 32, 2); \ DECL_VARIABLE(VAR, float, 32, 4); \ DECL_VARIABLE(VAR, float, 64, 2); DECL_VABD_VAR(vector1); DECL_VABD_VAR(vector2); DECL_VABD_VAR(vector3); DECL_VABD_VAR(vector_res); clean_results (); /* Initialize input "vector1" from "buffer". */ VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); VLOAD(vector1, buffer, q, float, f, 64, 2); /* Choose init value arbitrarily. */ VDUP(vector2, , float, f, 32, 2, 9.3f); VDUP(vector2, q, float, f, 32, 4, 29.7f); VDUP(vector2, q, float, f, 64, 2, 15.8f); /* Choose init value arbitrarily. */ VDUP(vector3, , float, f, 32, 2, 81.2f); VDUP(vector3, q, float, f, 32, 4, 36.8f); VDUP(vector3, q, float, f, 64, 2, 51.7f); /* Execute the tests. */ TEST_VFMA(, float, f, 32, 2); TEST_VFMA(q, float, f, 32, 4); TEST_VFMA(q, float, f, 64, 2); CHECK_VFMA_RESULTS (TEST_MSG, ""); }
void FNNAME (INSN_NAME) (void) { /* Basic test: y=vqdmull(x,x), then store the result. */ #define TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ VECT_VAR(vector_res, T1, W2, N)); \ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) /* Two auxliary macros are necessary to expand INSN. */ #define TEST_VQDMULL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) #define TEST_VQDMULL(T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \ TEST_VQDMULL1(INSN_NAME, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) DECL_VARIABLE(vector, int, 16, 4); DECL_VARIABLE(vector, int, 32, 2); DECL_VARIABLE(vector2, int, 16, 4); DECL_VARIABLE(vector2, int, 32, 2); DECL_VARIABLE(vector_res, int, 32, 4); DECL_VARIABLE(vector_res, int, 64, 2); clean_results (); VLOAD(vector, buffer, , int, s, 16, 4); VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector2, buffer, , int, s, 16, 4); VLOAD(vector2, buffer, , int, s, 32, 2); TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat, ""); TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat, ""); CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK (TEST_MSG, int, 64, 2, PRIx64, expected, ""); VDUP(vector, , int, s, 16, 4, 0x8000); VDUP(vector2, , int, s, 16, 4, 0x8000); VDUP(vector, , int, s, 32, 2, 0x80000000); VDUP(vector2, , int, s, 32, 2, 0x80000000); #define TEST_MSG2 "with saturation" TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat2, TEST_MSG2); TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat2, TEST_MSG2); CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); CHECK (TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); }
void exec_vmull (void) { /* Basic test: y=vmull(x,x), then store the result. */ #define TEST_VMULL(T1, T2, W, W2, N) \ VECT_VAR(vector_res, T1, W2, N) = \ vmull_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector, T1, W, N)); \ vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) DECL_VARIABLE(vector, int, 8, 8); DECL_VARIABLE(vector, int, 16, 4); DECL_VARIABLE(vector, int, 32, 2); DECL_VARIABLE(vector, uint, 8, 8); DECL_VARIABLE(vector, uint, 16, 4); DECL_VARIABLE(vector, uint, 32, 2); DECL_VARIABLE(vector, poly, 8, 8); DECL_VARIABLE(vector_res, int, 16, 8); DECL_VARIABLE(vector_res, int, 32, 4); DECL_VARIABLE(vector_res, int, 64, 2); DECL_VARIABLE(vector_res, uint, 16, 8); DECL_VARIABLE(vector_res, uint, 32, 4); DECL_VARIABLE(vector_res, uint, 64, 2); DECL_VARIABLE(vector_res, poly, 16, 8); clean_results (); VLOAD(vector, buffer, , int, s, 8, 8); VLOAD(vector, buffer, , int, s, 16, 4); VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector, buffer, , uint, u, 8, 8); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); VLOAD(vector, buffer, , poly, p, 8, 8); TEST_VMULL(int, s, 8, 16, 8); TEST_VMULL(int, s, 16, 32, 4); TEST_VMULL(int, s, 32, 64, 2); TEST_VMULL(uint, u, 8, 16, 8); TEST_VMULL(uint, u, 16, 32, 4); TEST_VMULL(uint, u, 32, 64, 2); TEST_VMULL(poly, p, 8, 16, 8); CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); }
void exec_vmull_lane (void) { /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ #define TEST_VMULL_LANE(T1, T2, W, W2, N, L) \ VECT_VAR(vector_res, T1, W2, N) = \ vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N), \ L); \ vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) DECL_VARIABLE(vector, int, 16, 4); DECL_VARIABLE(vector, int, 32, 2); DECL_VARIABLE(vector, uint, 16, 4); DECL_VARIABLE(vector, uint, 32, 2); DECL_VARIABLE(vector2, int, 16, 4); DECL_VARIABLE(vector2, int, 32, 2); DECL_VARIABLE(vector2, uint, 16, 4); DECL_VARIABLE(vector2, uint, 32, 2); DECL_VARIABLE(vector_res, int, 32, 4); DECL_VARIABLE(vector_res, int, 64, 2); DECL_VARIABLE(vector_res, uint, 32, 4); DECL_VARIABLE(vector_res, uint, 64, 2); clean_results (); /* Initialize vector. */ VDUP(vector, , int, s, 16, 4, 0x1000); VDUP(vector, , int, s, 32, 2, 0x1000); VDUP(vector, , uint, u, 16, 4, 0x1000); VDUP(vector, , uint, u, 32, 2, 0x1000); /* Initialize vector2. */ VDUP(vector2, , int, s, 16, 4, 0x4); VDUP(vector2, , int, s, 32, 2, 0x2); VDUP(vector2, , uint, u, 16, 4, 0x4); VDUP(vector2, , uint, u, 32, 2, 0x2); /* Choose lane arbitrarily. */ TEST_VMULL_LANE(int, s, 16, 32, 4, 2); TEST_VMULL_LANE(int, s, 32, 64, 2, 1); TEST_VMULL_LANE(uint, u, 16, 32, 4, 2); TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); }
void vqabs_extra() { /* No need for 64 bits variants. */ DECL_VARIABLE(vector, int, 8, 8); DECL_VARIABLE(vector, int, 16, 4); DECL_VARIABLE(vector, int, 32, 2); DECL_VARIABLE(vector, int, 8, 16); DECL_VARIABLE(vector, int, 16, 8); DECL_VARIABLE(vector, int, 32, 4); DECL_VARIABLE(vector_res, int, 8, 8); DECL_VARIABLE(vector_res, int, 16, 4); DECL_VARIABLE(vector_res, int, 32, 2); DECL_VARIABLE(vector_res, int, 8, 16); DECL_VARIABLE(vector_res, int, 16, 8); DECL_VARIABLE(vector_res, int, 32, 4); clean_results (); /* Initialize input "vector" with min negative values to check saturation. */ VDUP(vector, , int, s, 8, 8, 0x80); VDUP(vector, , int, s, 16, 4, 0x8000); VDUP(vector, , int, s, 32, 2, 0x80000000); VDUP(vector, q, int, s, 8, 16, 0x80); VDUP(vector, q, int, s, 16, 8, 0x8000); VDUP(vector, q, int, s, 32, 4, 0x80000000); #define MSG "min negative input" TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat_min_neg, MSG); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat_min_neg, MSG); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat_min_neg, MSG); TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat_min_neg, MSG); TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat_min_neg, MSG); TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG); CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG); CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG); CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG); CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG); }
void exec_vget_low (void) { /* Basic test: vec64=vget_low(vec128), then store the result. */ #define TEST_VGET_LOW(T1, T2, W, N, N2) \ VECT_VAR(vector64, T1, W, N) = \ vget_low_##T2##W(VECT_VAR(vector128, T1, W, N2)); \ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N)) DECL_VARIABLE_64BITS_VARIANTS(vector64); DECL_VARIABLE_128BITS_VARIANTS(vector128); TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); VLOAD(vector128, buffer, q, float, f, 32, 4); clean_results (); /* Execute the tests. */ TEST_VGET_LOW(int, s, 8, 8, 16); TEST_VGET_LOW(int, s, 16, 4, 8); TEST_VGET_LOW(int, s, 32, 2, 4); TEST_VGET_LOW(int, s, 64, 1, 2); TEST_VGET_LOW(uint, u, 8, 8, 16); TEST_VGET_LOW(uint, u, 16, 4, 8); TEST_VGET_LOW(uint, u, 32, 2, 4); TEST_VGET_LOW(uint, u, 64, 1, 2); TEST_VGET_LOW(poly, p, 8, 8, 16); TEST_VGET_LOW(poly, p, 16, 4, 8); TEST_VGET_LOW(float, f, 32, 2, 4); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); }
void exec_vld1 (void) { /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector). */ /* This test actually tests vdl1 and vst1 at the same time. */ #define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) DECL_VARIABLE_ALL_VARIANTS(vector); clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VLD1(vector, buffer, , float, f, 16, 4); TEST_VLD1(vector, buffer, q, float, f, 16, 8); #endif TEST_VLD1(vector, buffer, , float, f, 32, 2); TEST_VLD1(vector, buffer, q, float, f, 32, 4); CHECK_RESULTS (TEST_MSG, ""); }
void exec_vstX_lane (void) { /* In this case, input variables are arrays of vectors. */ #define DECL_VSTX_LANE(T1, W, N, X) \ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] /* We need to use a temporary result buffer (result_bis), because the one used for other tests is not large enough. A subset of the result data is moved from result_bis to result, and it is this subset which is used to check the actual behavior. The next macro enables to move another chunk of data from result_bis to result. */ /* We also use another extra input buffer (buffer_src), which we fill with 0xAA, and which it used to load a vector from which we read a given lane. */ #define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ sizeof(VECT_VAR(buffer_src, T1, W, N))); \ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \ \ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ \ VECT_ARRAY_VAR(vector, T1, W, N, X) = \ /* Use dedicated init buffer, of size X. */ \ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ L); \ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ VECT_ARRAY_VAR(vector, T1, W, N, X), \ L); \ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ sizeof(VECT_VAR(result, T1, W, N))); /* Overwrite "result" with the contents of "result_bis"[Y]. */ #define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ memcpy(VECT_VAR(result, T1, W, N), \ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ sizeof(VECT_VAR(result, T1, W, N))); /* We need all variants in 64 bits, but there is no 64x2 variant, nor 128 bits vectors of int8/uint8/poly8. */ #define DECL_ALL_VSTX_LANE_NO_FP16(X) \ DECL_VSTX_LANE(int, 8, 8, X); \ DECL_VSTX_LANE(int, 16, 4, X); \ DECL_VSTX_LANE(int, 32, 2, X); \ DECL_VSTX_LANE(uint, 8, 8, X); \ DECL_VSTX_LANE(uint, 16, 4, X); \ DECL_VSTX_LANE(uint, 32, 2, X); \ DECL_VSTX_LANE(poly, 8, 8, X); \ DECL_VSTX_LANE(poly, 16, 4, X); \ DECL_VSTX_LANE(float, 32, 2, X); \ DECL_VSTX_LANE(int, 16, 8, X); \ DECL_VSTX_LANE(int, 32, 4, X); \ DECL_VSTX_LANE(uint, 16, 8, X); \ DECL_VSTX_LANE(uint, 32, 4, X); \ DECL_VSTX_LANE(poly, 16, 8, X); \ DECL_VSTX_LANE(float, 32, 4, X) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #define DECL_ALL_VSTX_LANE(X) \ DECL_ALL_VSTX_LANE_NO_FP16(X); \ DECL_VSTX_LANE(float, 16, 4, X); \ DECL_VSTX_LANE(float, 16, 8, X) #else #define DECL_ALL_VSTX_LANE(X) DECL_ALL_VSTX_LANE_NO_FP16(X) #endif #define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] /* Use the same lanes regardless of the size of the array (X), for simplicity. */ #define TEST_ALL_VSTX_LANE_NO_FP16(X) \ TEST_VSTX_LANE(, int, s, 8, 8, X, 7); \ TEST_VSTX_LANE(, int, s, 16, 4, X, 2); \ TEST_VSTX_LANE(, int, s, 32, 2, X, 0); \ TEST_VSTX_LANE(, float, f, 32, 2, X, 0); \ TEST_VSTX_LANE(, uint, u, 8, 8, X, 4); \ TEST_VSTX_LANE(, uint, u, 16, 4, X, 3); \ TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \ TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \ TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \ TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \ TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \ TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \ TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0); \ TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #define TEST_ALL_VSTX_LANE(X) \ TEST_ALL_VSTX_LANE_NO_FP16(X); \ TEST_VSTX_LANE(, float, f, 16, 4, X, 2); \ TEST_VSTX_LANE(q, float, f, 16, 8, X, 6) #else #define TEST_ALL_VSTX_LANE(X) TEST_ALL_VSTX_LANE_NO_FP16(X) #endif #define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #define TEST_ALL_EXTRA_CHUNKS(X,Y) \ TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y); \ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 16, 8, X, Y) #else #define TEST_ALL_EXTRA_CHUNKS(X,Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) #endif /* Declare the temporary buffers / variables. */ DECL_ALL_VSTX_LANE(2); DECL_ALL_VSTX_LANE(3); DECL_ALL_VSTX_LANE(4); /* Define dummy input arrays, large enough for x4 vectors. */ DUMMY_ARRAY(buffer_src, int, 8, 8, 4); DUMMY_ARRAY(buffer_src, int, 16, 4, 4); DUMMY_ARRAY(buffer_src, int, 32, 2, 4); DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DUMMY_ARRAY(buffer_src, float, 16, 4, 4); #endif DUMMY_ARRAY(buffer_src, float, 32, 2, 4); DUMMY_ARRAY(buffer_src, int, 16, 8, 4); DUMMY_ARRAY(buffer_src, int, 32, 4, 4); DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DUMMY_ARRAY(buffer_src, float, 16, 8, 4); #endif DUMMY_ARRAY(buffer_src, float, 32, 4, 4); /* Check vst2_lane/vst2q_lane. */ clean_results (); #define TEST_MSG "VST2_LANE/VST2Q_LANE" TEST_ALL_VSTX_LANE(2); #define CMT " (chunk 0)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_0, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_0, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_0, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_0, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_0, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_0, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_0, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_0, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_0, CMT); #endif TEST_ALL_EXTRA_CHUNKS(2, 1); #undef CMT #define CMT " chunk 1" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_1, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_1, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_1, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_1, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_1, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_1, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_1, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_1, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_1, CMT); #endif /* Check vst3_lane/vst3q_lane. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VST3_LANE/VST3Q_LANE" TEST_ALL_VSTX_LANE(3); #undef CMT #define CMT " (chunk 0)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_0, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_0, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_0, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_0, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_0, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_0, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_0, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_0, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_0, CMT); #endif TEST_ALL_EXTRA_CHUNKS(3, 1); #undef CMT #define CMT " (chunk 1)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_1, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_1, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_1, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_1, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_1, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_1, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_1, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_1, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_1, CMT); #endif TEST_ALL_EXTRA_CHUNKS(3, 2); #undef CMT #define CMT " (chunk 2)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_2, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_2, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_2, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_2, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_2, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_2, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_2, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_2, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_2, CMT); #endif /* Check vst4_lane/vst4q_lane. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VST4_LANE/VST4Q_LANE" TEST_ALL_VSTX_LANE(4); #undef CMT #define CMT " (chunk 0)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_0, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_0, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_0, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_0, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_0, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_0, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_0, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_0, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_0, CMT); #endif TEST_ALL_EXTRA_CHUNKS(4, 1); #undef CMT #define CMT " (chunk 1)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_1, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_1, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_1, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_1, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_1, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_1, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_1, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_1, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_1, CMT); #endif TEST_ALL_EXTRA_CHUNKS(4, 2); #undef CMT #define CMT " (chunk 2)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_2, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_2, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_2, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_2, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_2, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_2, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_2, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_2, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_2, CMT); #endif TEST_ALL_EXTRA_CHUNKS(4, 3); #undef CMT #define CMT " (chunk 3)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_3, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_3, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_3, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_3, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_3, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_3, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_3, CMT); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_3, CMT); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_3, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_3, CMT); #endif }
void FNNAME (INSN_NAME) (void) { /* Basic test: v3=vshl(v1,v2), then store the result. */ #define TEST_VSHL(T3, Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector_shift, T3, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); clean_results (); /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); /* Choose init value arbitrarily, will be used as shift amount. */ VDUP(vector_shift, , int, s, 8, 8, 1); VDUP(vector_shift, , int, s, 16, 4, 3); VDUP(vector_shift, , int, s, 32, 2, 8); VDUP(vector_shift, , int, s, 64, 1, 3); VDUP(vector_shift, q, int, s, 8, 16, 5); VDUP(vector_shift, q, int, s, 16, 8, 12); VDUP(vector_shift, q, int, s, 32, 4, 30); VDUP(vector_shift, q, int, s, 64, 2, 63); /* Execute the tests. */ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); /* Test large shift amount (larger or equal to the type width. */ VDUP(vector_shift, , int, s, 8, 8, 8); VDUP(vector_shift, , int, s, 16, 4, 16); VDUP(vector_shift, , int, s, 32, 2, 32); VDUP(vector_shift, , int, s, 64, 1, 64); VDUP(vector_shift, q, int, s, 8, 16, 8); VDUP(vector_shift, q, int, s, 16, 8, 17); VDUP(vector_shift, q, int, s, 32, 4, 33); VDUP(vector_shift, q, int, s, 64, 2, 65); /* Execute the tests. */ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); #define COMMENT1 "(large shift amount)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_shift, COMMENT1); CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_shift, COMMENT1); CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_shift, COMMENT1); /* Test negative shift amount. */ VDUP(vector_shift, , int, s, 8, 8, -1); VDUP(vector_shift, , int, s, 16, 4, -1); VDUP(vector_shift, , int, s, 32, 2, -2); VDUP(vector_shift, , int, s, 64, 1, -4); VDUP(vector_shift, q, int, s, 8, 16, -2); VDUP(vector_shift, q, int, s, 16, 8, -5); VDUP(vector_shift, q, int, s, 32, 4, -3); VDUP(vector_shift, q, int, s, 64, 2, -5); /* Execute the tests. */ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); #define COMMENT2 "(negative shift amount)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative_shift, COMMENT2); CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_negative_shift, COMMENT2); }
void exec_vdup_vmov (void) { int i; /* Basic test: vec=vdup(x), then store the result. */ #undef TEST_VDUP #define TEST_VDUP(Q, T1, T2, W, N) \ VECT_VAR(vector, T1, W, N) = \ vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) /* Basic test: vec=vmov(x), then store the result. */ #define TEST_VMOV(Q, T1, T2, W, N) \ VECT_VAR(vector, T1, W, N) = \ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) DECL_VARIABLE_ALL_VARIANTS(vector); /* Try to read different places from the input buffer. */ for (i=0; i< 3; i++) { clean_results (); TEST_VDUP(, int, s, 8, 8); TEST_VDUP(, int, s, 16, 4); TEST_VDUP(, int, s, 32, 2); TEST_VDUP(, int, s, 64, 1); TEST_VDUP(, uint, u, 8, 8); TEST_VDUP(, uint, u, 16, 4); TEST_VDUP(, uint, u, 32, 2); TEST_VDUP(, uint, u, 64, 1); TEST_VDUP(, poly, p, 8, 8); TEST_VDUP(, poly, p, 16, 4); TEST_VDUP(, float, f, 32, 2); TEST_VDUP(q, int, s, 8, 16); TEST_VDUP(q, int, s, 16, 8); TEST_VDUP(q, int, s, 32, 4); TEST_VDUP(q, int, s, 64, 2); TEST_VDUP(q, uint, u, 8, 16); TEST_VDUP(q, uint, u, 16, 8); TEST_VDUP(q, uint, u, 32, 4); TEST_VDUP(q, uint, u, 64, 2); TEST_VDUP(q, poly, p, 8, 16); TEST_VDUP(q, poly, p, 16, 8); TEST_VDUP(q, float, f, 32, 4); switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); break; case 1: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, ""); break; case 2: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, ""); break; default: abort(); } } /* Do the same tests with vmov. Use the same expected results. */ #undef TEST_MSG #define TEST_MSG "VMOV/VMOVQ" for (i=0; i< 3; i++) { clean_results (); TEST_VMOV(, int, s, 8, 8); TEST_VMOV(, int, s, 16, 4); TEST_VMOV(, int, s, 32, 2); TEST_VMOV(, int, s, 64, 1); TEST_VMOV(, uint, u, 8, 8); TEST_VMOV(, uint, u, 16, 4); TEST_VMOV(, uint, u, 32, 2); TEST_VMOV(, uint, u, 64, 1); TEST_VMOV(, poly, p, 8, 8); TEST_VMOV(, poly, p, 16, 4); TEST_VMOV(, float, f, 32, 2); TEST_VMOV(q, int, s, 8, 16); TEST_VMOV(q, int, s, 16, 8); TEST_VMOV(q, int, s, 32, 4); TEST_VMOV(q, int, s, 64, 2); TEST_VMOV(q, uint, u, 8, 16); TEST_VMOV(q, uint, u, 16, 8); TEST_VMOV(q, uint, u, 32, 4); TEST_VMOV(q, uint, u, 64, 2); TEST_VMOV(q, poly, p, 8, 16); TEST_VMOV(q, poly, p, 16, 8); TEST_VMOV(q, float, f, 32, 4); switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); break; case 1: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, ""); break; case 2: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, ""); break; default: abort(); } } }
void FNNAME (INSN_NAME) (void) { /* In this case, output variables are arrays of vectors. */ #define DECL_VZIP(T1, W, N) \ VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2); \ VECT_VAR_DECL(result_bis, T1, W, N)[2 * N] /* We need to use a temporary result buffer (result_bis), because the one used for other tests is not large enough. A subset of the result data is moved from result_bis to result, and it is this subset which is used to check the actual behaviour. The next macro enables to move another chunk of data from result_bis to result. */ #define TEST_VZIP(INSN, Q, T1, T2, W, N) \ VECT_ARRAY_VAR(result_vec, T1, W, N, 2) = \ INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N), \ VECT_ARRAY_VAR(result_vec, T1, W, N, 2)); \ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N), \ sizeof(VECT_VAR(result, T1, W, N))); /* Overwrite "result" with the contents of "result_bis"[X]. */ #define TEST_EXTRA_CHUNK(T1, W, N, X) \ memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \ sizeof(VECT_VAR(result, T1, W, N))); DECL_VARIABLE_ALL_VARIANTS(vector1); DECL_VARIABLE_ALL_VARIANTS(vector2); /* We don't need 64 bits variants. */ #define DECL_ALL_VZIP() \ DECL_VZIP(int, 8, 8); \ DECL_VZIP(int, 16, 4); \ DECL_VZIP(int, 32, 2); \ DECL_VZIP(uint, 8, 8); \ DECL_VZIP(uint, 16, 4); \ DECL_VZIP(uint, 32, 2); \ DECL_VZIP(poly, 8, 8); \ DECL_VZIP(poly, 16, 4); \ DECL_VZIP(float, 32, 2); \ DECL_VZIP(int, 8, 16); \ DECL_VZIP(int, 16, 8); \ DECL_VZIP(int, 32, 4); \ DECL_VZIP(uint, 8, 16); \ DECL_VZIP(uint, 16, 8); \ DECL_VZIP(uint, 32, 4); \ DECL_VZIP(poly, 8, 16); \ DECL_VZIP(poly, 16, 8); \ DECL_VZIP(float, 32, 4) DECL_ALL_VZIP(); /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); /* Choose arbitrary initialization values. */ VDUP(vector2, , int, s, 8, 8, 0x11); VDUP(vector2, , int, s, 16, 4, 0x22); VDUP(vector2, , int, s, 32, 2, 0x33); VDUP(vector2, , uint, u, 8, 8, 0x55); VDUP(vector2, , uint, u, 16, 4, 0x66); VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); VDUP(vector2, q, int, s, 16, 8, 0x22); VDUP(vector2, q, int, s, 32, 4, 0x33); VDUP(vector2, q, uint, u, 8, 16, 0x55); VDUP(vector2, q, uint, u, 16, 8, 0x66); VDUP(vector2, q, uint, u, 32, 4, 0x77); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); VDUP(vector2, q, float, f, 32, 4, 33.8f); #define TEST_ALL_VZIP(INSN) \ TEST_VZIP(INSN, , int, s, 8, 8); \ TEST_VZIP(INSN, , int, s, 16, 4); \ TEST_VZIP(INSN, , int, s, 32, 2); \ TEST_VZIP(INSN, , uint, u, 8, 8); \ TEST_VZIP(INSN, , uint, u, 16, 4); \ TEST_VZIP(INSN, , uint, u, 32, 2); \ TEST_VZIP(INSN, , poly, p, 8, 8); \ TEST_VZIP(INSN, , poly, p, 16, 4); \ TEST_VZIP(INSN, , float, f, 32, 2); \ TEST_VZIP(INSN, q, int, s, 8, 16); \ TEST_VZIP(INSN, q, int, s, 16, 8); \ TEST_VZIP(INSN, q, int, s, 32, 4); \ TEST_VZIP(INSN, q, uint, u, 8, 16); \ TEST_VZIP(INSN, q, uint, u, 16, 8); \ TEST_VZIP(INSN, q, uint, u, 32, 4); \ TEST_VZIP(INSN, q, poly, p, 8, 16); \ TEST_VZIP(INSN, q, poly, p, 16, 8); \ TEST_VZIP(INSN, q, float, f, 32, 4) #define TEST_ALL_EXTRA_CHUNKS() \ TEST_EXTRA_CHUNK(int, 8, 8, 1); \ TEST_EXTRA_CHUNK(int, 16, 4, 1); \ TEST_EXTRA_CHUNK(int, 32, 2, 1); \ TEST_EXTRA_CHUNK(uint, 8, 8, 1); \ TEST_EXTRA_CHUNK(uint, 16, 4, 1); \ TEST_EXTRA_CHUNK(uint, 32, 2, 1); \ TEST_EXTRA_CHUNK(poly, 8, 8, 1); \ TEST_EXTRA_CHUNK(poly, 16, 4, 1); \ TEST_EXTRA_CHUNK(float, 32, 2, 1); \ TEST_EXTRA_CHUNK(int, 8, 16, 1); \ TEST_EXTRA_CHUNK(int, 16, 8, 1); \ TEST_EXTRA_CHUNK(int, 32, 4, 1); \ TEST_EXTRA_CHUNK(uint, 8, 16, 1); \ TEST_EXTRA_CHUNK(uint, 16, 8, 1); \ TEST_EXTRA_CHUNK(uint, 32, 4, 1); \ TEST_EXTRA_CHUNK(poly, 8, 16, 1); \ TEST_EXTRA_CHUNK(poly, 16, 8, 1); \ TEST_EXTRA_CHUNK(float, 32, 4, 1) clean_results (); /* Execute the tests. */ TEST_ALL_VZIP(INSN_NAME); CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)"); TEST_ALL_EXTRA_CHUNKS(); CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)"); }
void exec_vldX_lane (void) { /* In this case, input variables are arrays of vectors. */ #define DECL_VLDX_LANE(T1, W, N, X) \ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] /* We need to use a temporary result buffer (result_bis), because the one used for other tests is not large enough. A subset of the result data is moved from result_bis to result, and it is this subset which is used to check the actual behaviour. The next macro enables to move another chunk of data from result_bis to result. */ /* We also use another extra input buffer (buffer_src), which we fill with 0xAA, and which it used to load a vector from which we read a given lane. */ #define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ sizeof(VECT_VAR(buffer_src, T1, W, N))); \ \ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ \ VECT_ARRAY_VAR(vector, T1, W, N, X) = \ /* Use dedicated init buffer, of size. X */ \ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ L); \ vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ VECT_ARRAY_VAR(vector, T1, W, N, X)); \ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ sizeof(VECT_VAR(result, T1, W, N))) /* Overwrite "result" with the contents of "result_bis"[Y]. */ #define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ memcpy(VECT_VAR(result, T1, W, N), \ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ sizeof(VECT_VAR(result, T1, W, N))); /* We need all variants in 64 bits, but there is no 64x2 variant. */ #define DECL_ALL_VLDX_LANE(X) \ DECL_VLDX_LANE(int, 8, 8, X); \ DECL_VLDX_LANE(int, 16, 4, X); \ DECL_VLDX_LANE(int, 32, 2, X); \ DECL_VLDX_LANE(uint, 8, 8, X); \ DECL_VLDX_LANE(uint, 16, 4, X); \ DECL_VLDX_LANE(uint, 32, 2, X); \ DECL_VLDX_LANE(poly, 8, 8, X); \ DECL_VLDX_LANE(poly, 16, 4, X); \ DECL_VLDX_LANE(int, 16, 8, X); \ DECL_VLDX_LANE(int, 32, 4, X); \ DECL_VLDX_LANE(uint, 16, 8, X); \ DECL_VLDX_LANE(uint, 32, 4, X); \ DECL_VLDX_LANE(poly, 16, 8, X); \ DECL_VLDX_LANE(float, 32, 2, X); \ DECL_VLDX_LANE(float, 32, 4, X) /* Add some padding to try to catch out of bound accesses. */ #define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42} #define DUMMY_ARRAY(V, T, W, N, L) \ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \ ARRAY1(V##_pad,T,W,N) /* Use the same lanes regardless of the size of the array (X), for simplicity. */ #define TEST_ALL_VLDX_LANE(X) \ TEST_VLDX_LANE(, int, s, 8, 8, X, 7); \ TEST_VLDX_LANE(, int, s, 16, 4, X, 2); \ TEST_VLDX_LANE(, int, s, 32, 2, X, 0); \ TEST_VLDX_LANE(, uint, u, 8, 8, X, 4); \ TEST_VLDX_LANE(, uint, u, 16, 4, X, 3); \ TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \ TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \ TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \ TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \ TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \ TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \ TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \ TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \ TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \ TEST_VLDX_LANE(q, float, f, 32, 4, X, 2) #define TEST_ALL_EXTRA_CHUNKS(X, Y) \ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) /* vldX_lane supports only a subset of all variants. */ #define CHECK_RESULTS_VLDX_LANE(test_name,EXPECTED,comment) \ { \ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ } \ /* Declare the temporary buffers / variables. */ DECL_ALL_VLDX_LANE(2); DECL_ALL_VLDX_LANE(3); DECL_ALL_VLDX_LANE(4); /* Define dummy input arrays, large enough for x4 vectors. */ DUMMY_ARRAY(buffer_src, int, 8, 8, 4); DUMMY_ARRAY(buffer_src, int, 16, 4, 4); DUMMY_ARRAY(buffer_src, int, 32, 2, 4); DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); DUMMY_ARRAY(buffer_src, int, 16, 8, 4); DUMMY_ARRAY(buffer_src, int, 32, 4, 4); DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); DUMMY_ARRAY(buffer_src, float, 32, 2, 4); DUMMY_ARRAY(buffer_src, float, 32, 4, 4); /* Check vld2_lane/vld2q_lane. */ clean_results (); #define TEST_MSG "VLD2_LANE/VLD2Q_LANE" TEST_ALL_VLDX_LANE(2); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_0, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld2_1, " chunk 1"); /* Check vld3_lane/vld3q_lane. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VLD3_LANE/VLD3Q_LANE" TEST_ALL_VLDX_LANE(3); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_0, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_1, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld3_2, " chunk 2"); /* Check vld4_lane/vld4q_lane. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VLD4_LANE/VLD4Q_LANE" TEST_ALL_VLDX_LANE(4); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_0, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_1, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_2, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); CHECK_RESULTS_VLDX_LANE (TEST_MSG, expected_vld4_3, " chunk 3"); }
void exec_vext (void) { /* vector_res = vext(vector1,vector2,offset), then store the result. */ #define TEST_VEXT(Q, T1, T2, W, N, V) \ VECT_VAR(vector_res, T1, W, N) = \ vext##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N), \ V); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) DECL_VARIABLE_ALL_VARIANTS(vector1); DECL_VARIABLE_ALL_VARIANTS(vector2); DECL_VARIABLE_ALL_VARIANTS(vector_res); clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); /* Choose arbitrary initialization values. */ VDUP(vector2, , int, s, 8, 8, 0x11); VDUP(vector2, , int, s, 16, 4, 0x22); VDUP(vector2, , int, s, 32, 2, 0x33); VDUP(vector2, , int, s, 64, 1, 0x44); VDUP(vector2, , uint, u, 8, 8, 0x55); VDUP(vector2, , uint, u, 16, 4, 0x66); VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , uint, u, 64, 1, 0x88); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); VDUP(vector2, q, int, s, 16, 8, 0x22); VDUP(vector2, q, int, s, 32, 4, 0x33); VDUP(vector2, q, int, s, 64, 2, 0x44); VDUP(vector2, q, uint, u, 8, 16, 0x55); VDUP(vector2, q, uint, u, 16, 8, 0x66); VDUP(vector2, q, uint, u, 32, 4, 0x77); VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); VDUP(vector2, q, float, f, 32, 4, 33.2f); /* Choose arbitrary extract offsets. */ TEST_VEXT(, int, s, 8, 8, 7); TEST_VEXT(, int, s, 16, 4, 3); TEST_VEXT(, int, s, 32, 2, 1); TEST_VEXT(, int, s, 64, 1, 0); TEST_VEXT(, uint, u, 8, 8, 6); TEST_VEXT(, uint, u, 16, 4, 2); TEST_VEXT(, uint, u, 32, 2, 1); TEST_VEXT(, uint, u, 64, 1, 0); TEST_VEXT(, poly, p, 8, 8, 6); TEST_VEXT(, poly, p, 16, 4, 2); TEST_VEXT(, float, f, 32, 2, 1); TEST_VEXT(q, int, s, 8, 16, 14); TEST_VEXT(q, int, s, 16, 8, 7); TEST_VEXT(q, int, s, 32, 4, 3); TEST_VEXT(q, int, s, 64, 2, 1); TEST_VEXT(q, uint, u, 8, 16, 12); TEST_VEXT(q, uint, u, 16, 8, 6); TEST_VEXT(q, uint, u, 32, 4, 3); TEST_VEXT(q, uint, u, 64, 2, 1); TEST_VEXT(q, poly, p, 8, 16, 12); TEST_VEXT(q, poly, p, 16, 8, 6); TEST_VEXT(q, float, f, 32, 4, 3); CHECK_RESULTS (TEST_MSG, ""); }
void exec_vrshrn_n (void) { /* Basic test: v2=vrshrn_n(v1,v), then store the result. */ #define TEST_VRSHRN_N(T1, T2, W, N, W2, V) \ VECT_VAR(vector_res, T1, W2, N) = \ vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) /* vector is twice as large as vector_res. */ DECL_VARIABLE(vector, int, 16, 8); DECL_VARIABLE(vector, int, 32, 4); DECL_VARIABLE(vector, int, 64, 2); DECL_VARIABLE(vector, uint, 16, 8); DECL_VARIABLE(vector, uint, 32, 4); DECL_VARIABLE(vector, uint, 64, 2); DECL_VARIABLE(vector_res, int, 8, 8); DECL_VARIABLE(vector_res, int, 16, 4); DECL_VARIABLE(vector_res, int, 32, 2); DECL_VARIABLE(vector_res, uint, 8, 8); DECL_VARIABLE(vector_res, uint, 16, 4); DECL_VARIABLE(vector_res, uint, 32, 2); clean_results (); /* Fill input vector with 0, to check behavior on limits. */ VDUP(vector, q, int, s, 16, 8, 0); VDUP(vector, q, int, s, 32, 4, 0); VDUP(vector, q, int, s, 64, 2, 0); VDUP(vector, q, uint, u, 16, 8, 0); VDUP(vector, q, uint, u, 32, 4, 0); VDUP(vector, q, uint, u, 64, 2, 0); /* Choose shift amount arbitrarily. */ TEST_VRSHRN_N(int, s, 16, 8, 8, 1); TEST_VRSHRN_N(int, s, 32, 4, 16, 1); TEST_VRSHRN_N(int, s, 64, 2, 32, 2); TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); #define CMT " (with input = 0)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT); /* Test again, with predefined input values. */ VLOAD(vector, buffer, q, int, s, 16, 8); VLOAD(vector, buffer, q, int, s, 32, 4); VLOAD(vector, buffer, q, int, s, 64, 2); VLOAD(vector, buffer, q, uint, u, 16, 8); VLOAD(vector, buffer, q, uint, u, 32, 4); VLOAD(vector, buffer, q, uint, u, 64, 2); /* Choose shift amount arbitrarily. */ TEST_VRSHRN_N(int, s, 16, 8, 8, 1); TEST_VRSHRN_N(int, s, 32, 4, 16, 1); TEST_VRSHRN_N(int, s, 64, 2, 32, 2); TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); #undef CMT #define CMT "" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); /* Fill input arbitrary values. */ VDUP(vector, q, int, s, 16, 8, 30); VDUP(vector, q, int, s, 32, 4, 0); VDUP(vector, q, int, s, 64, 2, 0); VDUP(vector, q, uint, u, 16, 8, 0xFFF0); VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0); VDUP(vector, q, uint, u, 64, 2, 0); /* Choose large shift amount arbitrarily. */ TEST_VRSHRN_N(int, s, 16, 8, 8, 7); TEST_VRSHRN_N(int, s, 32, 4, 16, 14); TEST_VRSHRN_N(int, s, 64, 2, 32, 31); TEST_VRSHRN_N(uint, u, 16, 8, 8, 7); TEST_VRSHRN_N(uint, u, 32, 4, 16, 16); TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); #undef CMT #define CMT " (with large shift amount)" CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh_large, CMT); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh_large, CMT); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh_large, CMT); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh_large, CMT); CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh_large, CMT); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh_large, CMT); }
int main (void) { DECL_VARIABLE_128BITS_VARIANTS(vreint_vector); DECL_VARIABLE(vreint_vector, poly, 64, 2); DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res); DECL_VARIABLE(vreint_vector_res, poly, 64, 2); clean_results (); TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vreint_vector, buffer); VLOAD(vreint_vector, buffer, q, poly, p, 64, 2); VLOAD(vreint_vector, buffer, q, float, f, 16, 8); VLOAD(vreint_vector, buffer, q, float, f, 32, 4); /* vreinterpretq_p128_* tests. */ #undef TEST_MSG #define TEST_MSG "VREINTERPRETQ_P128_*" /* Since there is no way to store a poly128_t value, convert to poly64x2_t before storing. This means that we are not able to test vreinterpretq_p128* alone, and that errors in vreinterpretq_p64_p128 could compensate for errors in vreinterpretq_p128*. */ #define TEST_VREINTERPRET128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ VECT_VAR(vreint_vector_res, poly, 64, 2) = vreinterpretq_p64_p128( \ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS))); \ vst1##Q##_##T2##64(VECT_VAR(result, poly, 64, 2), \ VECT_VAR(vreint_vector_res, poly, 64, 2)); \ CHECK(TEST_MSG, T1, 64, 2, PRIx##64, EXPECTED, ""); TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 8, 16, vreint_expected_q_p128_s8); TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 16, 8, vreint_expected_q_p128_s16); TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 32, 4, vreint_expected_q_p128_s32); TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 64, 2, vreint_expected_q_p128_s64); TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 8, 16, vreint_expected_q_p128_u8); TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 16, 8, vreint_expected_q_p128_u16); TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 32, 4, vreint_expected_q_p128_u32); TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 64, 2, vreint_expected_q_p128_u64); TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 8, 16, vreint_expected_q_p128_p8); TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 16, 8, vreint_expected_q_p128_p16); TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 16, 8, vreint_expected_q_p128_f16); TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32); /* vreinterpretq_*_p128 tests. */ #undef TEST_MSG #define TEST_MSG "VREINTERPRETQ_*_P128" /* Since there is no way to load a poly128_t value, load a poly64x2_t and convert it to poly128_t. This means that we are not able to test vreinterpretq_*_p128 alone, and that errors in vreinterpretq_p128_p64 could compensate for errors in vreinterpretq_*_p128*. */ #define TEST_VREINTERPRET_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ VECT_VAR(vreint_vector_res, T1, W, N) = \ vreinterpret##Q##_##T2##W##_##TS2##WS( \ vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vreint_vector_res, T1, W, N)); \ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); #define TEST_VREINTERPRET_FP_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ VECT_VAR(vreint_vector_res, T1, W, N) = \ vreinterpret##Q##_##T2##W##_##TS2##WS( \ vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vreint_vector_res, T1, W, N)); \ CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); TEST_VREINTERPRET_FROM_P128(q, int, s, 8, 16, poly, p, 128, 1, vreint_expected_q_s8_p128); TEST_VREINTERPRET_FROM_P128(q, int, s, 16, 8, poly, p, 128, 1, vreint_expected_q_s16_p128); TEST_VREINTERPRET_FROM_P128(q, int, s, 32, 4, poly, p, 128, 1, vreint_expected_q_s32_p128); TEST_VREINTERPRET_FROM_P128(q, int, s, 64, 2, poly, p, 128, 1, vreint_expected_q_s64_p128); TEST_VREINTERPRET_FROM_P128(q, uint, u, 8, 16, poly, p, 128, 1, vreint_expected_q_u8_p128); TEST_VREINTERPRET_FROM_P128(q, uint, u, 16, 8, poly, p, 128, 1, vreint_expected_q_u16_p128); TEST_VREINTERPRET_FROM_P128(q, uint, u, 32, 4, poly, p, 128, 1, vreint_expected_q_u32_p128); TEST_VREINTERPRET_FROM_P128(q, uint, u, 64, 2, poly, p, 128, 1, vreint_expected_q_u64_p128); TEST_VREINTERPRET_FROM_P128(q, poly, p, 8, 16, poly, p, 128, 1, vreint_expected_q_p8_p128); TEST_VREINTERPRET_FROM_P128(q, poly, p, 16, 8, poly, p, 128, 1, vreint_expected_q_p16_p128); TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 16, 8, poly, p, 128, 1, vreint_expected_q_f16_p128); TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128); return 0; }
void FNNAME (INSN_NAME) (void) { /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ #define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N2), \ L); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) /* Two auxliary macros are necessary to expand INSN. */ #define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) #define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \ TEST_VQDMULH_LANE1(INSN_NAME, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) DECL_VARIABLE(vector, int, 16, 4); DECL_VARIABLE(vector, int, 32, 2); DECL_VARIABLE(vector, int, 16, 8); DECL_VARIABLE(vector, int, 32, 4); DECL_VARIABLE(vector_res, int, 16, 4); DECL_VARIABLE(vector_res, int, 32, 2); DECL_VARIABLE(vector_res, int, 16, 8); DECL_VARIABLE(vector_res, int, 32, 4); /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with the same number of elements, so we need only one variable of each type. */ DECL_VARIABLE(vector2, int, 16, 4); DECL_VARIABLE(vector2, int, 32, 2); clean_results (); VLOAD(vector, buffer, , int, s, 16, 4); VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector, buffer, q, int, s, 16, 8); VLOAD(vector, buffer, q, int, s, 32, 4); /* Initialize vector2. */ VDUP(vector2, , int, s, 16, 4, 0x55); VDUP(vector2, , int, s, 32, 2, 0xBB); /* Choose lane arbitrarily. */ TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, ""); TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, ""); TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, ""); TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, ""); CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, ""); CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, ""); CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); /* Choose input values to trigger saturation. */ VDUP(vector, , int, s, 16, 4, 0x8000); VDUP(vector, , int, s, 32, 2, 0x80000000); VDUP(vector, q, int, s, 16, 8, 0x8000); VDUP(vector, q, int, s, 32, 4, 0x80000000); VDUP(vector2, , int, s, 16, 4, 0x8000); VDUP(vector2, , int, s, 32, 2, 0x80000000); #define TEST_MSG2 " (check mul cumulative saturation)" TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3, expected_cumulative_sat2, TEST_MSG2); TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat2, TEST_MSG2); TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2, expected_cumulative_sat2, TEST_MSG2); TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1, expected_cumulative_sat2, TEST_MSG2); CHECK (TEST_MSG, int, 16, 4, PRIx16, expected2, TEST_MSG2); CHECK (TEST_MSG, int, 32, 2, PRIx32, expected2, TEST_MSG2); CHECK (TEST_MSG, int, 16, 8, PRIx16, expected2, TEST_MSG2); CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); }
void exec_vtbX (void) { int i; /* In this case, input variables are arrays of vectors. */ #define DECL_VTBX(T1, W, N, X) \ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X) /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a vector as 1st param, instead of an array of vectors. */ #define TEST_VTBL1(T1, T2, T3, W, N) \ VECT_VAR(table_vector, T1, W, N) = \ vld1##_##T2##W((T1##W##_t *)lookup_table); \ \ VECT_VAR(vector_res, T1, W, N) = \ vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N), \ VECT_VAR(vector, T3, W, N)); \ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); #define TEST_VTBLX(T1, T2, T3, W, N, X) \ VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ vld##X##_##T2##W((T1##W##_t *)lookup_table); \ \ VECT_VAR(vector_res, T1, W, N) = \ vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ VECT_VAR(vector, T3, W, N)); \ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); /* We need to define a lookup table. */ uint8_t lookup_table[32]; DECL_VARIABLE(vector, int, 8, 8); DECL_VARIABLE(vector, uint, 8, 8); DECL_VARIABLE(vector, poly, 8, 8); DECL_VARIABLE(vector_res, int, 8, 8); DECL_VARIABLE(vector_res, uint, 8, 8); DECL_VARIABLE(vector_res, poly, 8, 8); /* For vtbl1. */ DECL_VARIABLE(table_vector, int, 8, 8); DECL_VARIABLE(table_vector, uint, 8, 8); DECL_VARIABLE(table_vector, poly, 8, 8); /* For vtbx*. */ DECL_VARIABLE(default_vector, int, 8, 8); DECL_VARIABLE(default_vector, uint, 8, 8); DECL_VARIABLE(default_vector, poly, 8, 8); /* We need only 8 bits variants. */ #define DECL_ALL_VTBLX(X) \ DECL_VTBX(int, 8, 8, X); \ DECL_VTBX(uint, 8, 8, X); \ DECL_VTBX(poly, 8, 8, X) #define TEST_ALL_VTBL1() \ TEST_VTBL1(int, s, int, 8, 8); \ TEST_VTBL1(uint, u, uint, 8, 8); \ TEST_VTBL1(poly, p, uint, 8, 8) #define TEST_ALL_VTBLX(X) \ TEST_VTBLX(int, s, int, 8, 8, X); \ TEST_VTBLX(uint, u, uint, 8, 8, X); \ TEST_VTBLX(poly, p, uint, 8, 8, X) /* Declare the temporary buffers / variables. */ DECL_ALL_VTBLX(2); DECL_ALL_VTBLX(3); DECL_ALL_VTBLX(4); /* Fill the lookup table. */ for (i=0; i<32; i++) { lookup_table[i] = i-15; } /* Choose init value arbitrarily, will be used as table index. */ VDUP(vector, , int, s, 8, 8, 1); VDUP(vector, , uint, u, 8, 8, 2); VDUP(vector, , poly, p, 8, 8, 2); /* To ensure coverage, add some indexes larger than 8, 16 and 32 except: lane 0 (by 10), lane 4 (by 20) and lane 5 (by 40). */ VSET_LANE(vector, , int, s, 8, 8, 0, 10); VSET_LANE(vector, , int, s, 8, 8, 4, 20); VSET_LANE(vector, , int, s, 8, 8, 5, 40); VSET_LANE(vector, , uint, u, 8, 8, 0, 10); VSET_LANE(vector, , uint, u, 8, 8, 4, 20); VSET_LANE(vector, , uint, u, 8, 8, 5, 40); VSET_LANE(vector, , poly, p, 8, 8, 0, 10); VSET_LANE(vector, , poly, p, 8, 8, 4, 20); VSET_LANE(vector, , poly, p, 8, 8, 5, 40); /* Check vtbl1. */ clean_results (); #define TEST_MSG "VTBL1" TEST_ALL_VTBL1(); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl1, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl1, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, ""); /* Check vtbl2. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBL2" TEST_ALL_VTBLX(2); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl2, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl2, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, ""); /* Check vtbl3. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBL3" TEST_ALL_VTBLX(3); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl3, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl3, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, ""); /* Check vtbl4. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBL4" TEST_ALL_VTBLX(4); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl4, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl4, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, ""); /* Now test VTBX. */ /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a vector as 1st param, instead of an array of vectors. */ #define TEST_VTBX1(T1, T2, T3, W, N) \ VECT_VAR(table_vector, T1, W, N) = \ vld1##_##T2##W((T1##W##_t *)lookup_table); \ \ VECT_VAR(vector_res, T1, W, N) = \ vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N), \ VECT_VAR(table_vector, T1, W, N), \ VECT_VAR(vector, T3, W, N)); \ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); #define TEST_VTBXX(T1, T2, T3, W, N, X) \ VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ vld##X##_##T2##W((T1##W##_t *)lookup_table); \ \ VECT_VAR(vector_res, T1, W, N) = \ vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N), \ VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ VECT_VAR(vector, T3, W, N)); \ vst1_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); #define TEST_ALL_VTBX1() \ TEST_VTBX1(int, s, int, 8, 8); \ TEST_VTBX1(uint, u, uint, 8, 8); \ TEST_VTBX1(poly, p, uint, 8, 8) #define TEST_ALL_VTBXX(X) \ TEST_VTBXX(int, s, int, 8, 8, X); \ TEST_VTBXX(uint, u, uint, 8, 8, X); \ TEST_VTBXX(poly, p, uint, 8, 8, X) /* Choose init value arbitrarily, will be used as default value. */ VDUP(default_vector, , int, s, 8, 8, 0x33); VDUP(default_vector, , uint, u, 8, 8, 0xCC); VDUP(default_vector, , poly, p, 8, 8, 0xCC); /* Check vtbx1. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBX1" TEST_ALL_VTBX1(); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx1, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx1, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, ""); /* Check vtbx2. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBX2" TEST_ALL_VTBXX(2); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx2, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx2, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, ""); /* Check vtbx3. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBX3" TEST_ALL_VTBXX(3); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx3, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx3, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, ""); /* Check vtbx4. */ clean_results (); #undef TEST_MSG #define TEST_MSG "VTBX4" TEST_ALL_VTBXX(4); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx4, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx4, ""); CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, ""); }
void exec_vrsqrte(void) { int i; /* Basic test: y=vrsqrte(x), then store the result. */ #define TEST_VRSQRTE(Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)) DECL_VARIABLE(vector, uint, 32, 2); DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, uint, 32, 4); DECL_VARIABLE(vector, float, 32, 4); DECL_VARIABLE(vector_res, uint, 32, 2); DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, uint, 32, 4); DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily. */ VDUP(vector, , uint, u, 32, 2, 0x12345678); VDUP(vector, , float, f, 32, 2, 25.799999f); VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); VDUP(vector, q, float, f, 32, 4, 18.2f); /* Apply the operator. */ TEST_VRSQRTE(, uint, u, 32, 2); TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, uint, u, 32, 4); TEST_VRSQRTE(q, float, f, 32, 4); #define CMT "" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); /* Don't test FP variants with negative inputs. */ /* Use input with various values of bits 30 and 31. */ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); VDUP(vector, q, uint, u, 32, 4, 0x89081234); /* Apply the operator. */ TEST_VRSQRTE(, uint, u, 32, 2); TEST_VRSQRTE(q, uint, u, 32, 4); #undef CMT #define CMT " (large uint #1)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_1, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_1, CMT); /* Choose init value arbitrarily. */ VDUP(vector, , uint, u, 32, 2, 0x80000000); VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0); /* Apply the operator. */ TEST_VRSQRTE(, uint, u, 32, 2); TEST_VRSQRTE(q, uint, u, 32, 4); #undef CMT #define CMT " (large uint #2)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_2, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_2, CMT); /* Test FP variants with special input values (NaNs, ...). */ VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector, q, float, f, 32, 4, 0.0f); /* Apply the operator. */ TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN, 0)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (negative, infinity). */ VDUP(vector, , float, f, 32, 2, -1.0f); VDUP(vector, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator. */ TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (negative, infinity)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with special input values (-0, -infinity). */ VDUP(vector, , float, f, 32, 2, -0.0f); VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); /* Apply the operator. */ TEST_VRSQRTE(, float, f, 32, 2); TEST_VRSQRTE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (-0, -infinity)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); }
void exec_vabal (void) { /* Basic test: v4=vabal(v1,v2,v3), then store the result. */ #define TEST_VABAL(T1, T2, W, W2, N) \ VECT_VAR(vector_res, T1, W2, N) = \ vabal_##T2##W(VECT_VAR(vector1, T1, W2, N), \ VECT_VAR(vector2, T1, W, N), \ VECT_VAR(vector3, T1, W, N)); \ vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) #define DECL_VABAL_VAR_LONG(VAR) \ DECL_VARIABLE(VAR, int, 16, 8); \ DECL_VARIABLE(VAR, int, 32, 4); \ DECL_VARIABLE(VAR, int, 64, 2); \ DECL_VARIABLE(VAR, uint, 16, 8); \ DECL_VARIABLE(VAR, uint, 32, 4); \ DECL_VARIABLE(VAR, uint, 64, 2) #define DECL_VABAL_VAR_SHORT(VAR) \ DECL_VARIABLE(VAR, int, 8, 8); \ DECL_VARIABLE(VAR, int, 16, 4); \ DECL_VARIABLE(VAR, int, 32, 2); \ DECL_VARIABLE(VAR, uint, 8, 8); \ DECL_VARIABLE(VAR, uint, 16, 4); \ DECL_VARIABLE(VAR, uint, 32, 2) DECL_VABAL_VAR_LONG(vector1); DECL_VABAL_VAR_SHORT(vector2); DECL_VABAL_VAR_SHORT(vector3); DECL_VABAL_VAR_LONG(vector_res); clean_results (); /* Initialize input "vector1" from "buffer". */ VLOAD(vector1, buffer, q, int, s, 16, 8); VLOAD(vector1, buffer, q, int, s, 32, 4); VLOAD(vector1, buffer, q, int, s, 64, 2); VLOAD(vector1, buffer, q, uint, u, 16, 8); VLOAD(vector1, buffer, q, uint, u, 32, 4); VLOAD(vector1, buffer, q, uint, u, 64, 2); /* Choose init value arbitrarily. */ VDUP(vector2, , int, s, 8, 8, 1); VDUP(vector2, , int, s, 16, 4, -13); VDUP(vector2, , int, s, 32, 2, 8); VDUP(vector2, , uint, u, 8, 8, 1); VDUP(vector2, , uint, u, 16, 4, 13); VDUP(vector2, , uint, u, 32, 2, 8); /* Choose init value arbitrarily. */ VDUP(vector3, , int, s, 8, 8, -5); VDUP(vector3, , int, s, 16, 4, 25); VDUP(vector3, , int, s, 32, 2, -40); VDUP(vector3, , uint, u, 8, 8, 100); VDUP(vector3, , uint, u, 16, 4, 2340); VDUP(vector3, , uint, u, 32, 2, 0xffffffff); /* Execute the tests. */ TEST_VABAL(int, s, 8, 16, 8); TEST_VABAL(int, s, 16, 32, 4); TEST_VABAL(int, s, 32, 64, 2); TEST_VABAL(uint, u, 8, 16, 8); TEST_VABAL(uint, u, 16, 32, 4); TEST_VABAL(uint, u, 32, 64, 2); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); /* Use values that could lead to overflow intermediate * calculations. */ VDUP(vector2, , int, s, 8, 8, 0x80); VDUP(vector2, , int, s, 16, 4, 0x8000); VDUP(vector2, , int, s, 32, 2, 0x80000000); VDUP(vector2, , uint, u, 8, 8, 1); VDUP(vector2, , uint, u, 16, 4, 13); VDUP(vector2, , uint, u, 32, 2, 8); VDUP(vector3, , int, s, 8, 8, 0x7f); VDUP(vector3, , int, s, 16, 4, 0x7fff); VDUP(vector3, , int, s, 32, 2, 0x7fffffff); VDUP(vector3, , uint, u, 8, 8, 0xff); VDUP(vector3, , uint, u, 16, 4, 0xffff); VDUP(vector3, , uint, u, 32, 2, 0xffffffff); TEST_VABAL(int, s, 8, 16, 8); TEST_VABAL(int, s, 16, 32, 4); TEST_VABAL(int, s, 32, 64, 2); TEST_VABAL(uint, u, 8, 16, 8); TEST_VABAL(uint, u, 16, 32, 4); TEST_VABAL(uint, u, 32, 64, 2); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected2, " test intermediate overflow"); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, " test intermediate overflow"); CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, " test intermediate overflow"); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected2, " test intermediate overflow"); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2, " test intermediate overflow"); CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected2, " test intermediate overflow"); }
void exec_vaba (void) { /* Basic test: v4=vaba(v1,v2,v3), then store the result. */ #define TEST_VABA(Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N), \ VECT_VAR(vector3, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) #define DECL_VABA_VAR(VAR) \ DECL_VARIABLE(VAR, int, 8, 8); \ DECL_VARIABLE(VAR, int, 16, 4); \ DECL_VARIABLE(VAR, int, 32, 2); \ DECL_VARIABLE(VAR, uint, 8, 8); \ DECL_VARIABLE(VAR, uint, 16, 4); \ DECL_VARIABLE(VAR, uint, 32, 2); \ DECL_VARIABLE(VAR, int, 8, 16); \ DECL_VARIABLE(VAR, int, 16, 8); \ DECL_VARIABLE(VAR, int, 32, 4); \ DECL_VARIABLE(VAR, uint, 8, 16); \ DECL_VARIABLE(VAR, uint, 16, 8); \ DECL_VARIABLE(VAR, uint, 32, 4) DECL_VABA_VAR(vector1); DECL_VABA_VAR(vector2); DECL_VABA_VAR(vector3); DECL_VABA_VAR(vector_res); clean_results (); /* Initialize input "vector1" from "buffer". */ VLOAD(vector1, buffer, , int, s, 8, 8); VLOAD(vector1, buffer, , int, s, 16, 4); VLOAD(vector1, buffer, , int, s, 32, 2); VLOAD(vector1, buffer, , uint, u, 8, 8); VLOAD(vector1, buffer, , uint, u, 16, 4); VLOAD(vector1, buffer, , uint, u, 32, 2); VLOAD(vector1, buffer, q, int, s, 8, 16); VLOAD(vector1, buffer, q, int, s, 16, 8); VLOAD(vector1, buffer, q, int, s, 32, 4); VLOAD(vector1, buffer, q, uint, u, 8, 16); VLOAD(vector1, buffer, q, uint, u, 16, 8); VLOAD(vector1, buffer, q, uint, u, 32, 4); /* Choose init value arbitrarily. */ VDUP(vector2, , int, s, 8, 8, 1); VDUP(vector2, , int, s, 16, 4, -13); VDUP(vector2, , int, s, 32, 2, 8); VDUP(vector2, , uint, u, 8, 8, 1); VDUP(vector2, , uint, u, 16, 4, 13); VDUP(vector2, , uint, u, 32, 2, 8); VDUP(vector2, q, int, s, 8, 16, 10); VDUP(vector2, q, int, s, 16, 8, -12); VDUP(vector2, q, int, s, 32, 4, 32); VDUP(vector2, q, uint, u, 8, 16, 10); VDUP(vector2, q, uint, u, 16, 8, 12); VDUP(vector2, q, uint, u, 32, 4, 32); /* Choose init value arbitrarily. */ VDUP(vector3, , int, s, 8, 8, -5); VDUP(vector3, , int, s, 16, 4, 25); VDUP(vector3, , int, s, 32, 2, -40); VDUP(vector3, , uint, u, 8, 8, 100); VDUP(vector3, , uint, u, 16, 4, 2340); VDUP(vector3, , uint, u, 32, 2, 0xffffffff); VDUP(vector3, q, int, s, 8, 16, -100); VDUP(vector3, q, int, s, 16, 8, -3000); VDUP(vector3, q, int, s, 32, 4, 10000); VDUP(vector3, q, uint, u, 8, 16, 2); VDUP(vector3, q, uint, u, 16, 8, 3); VDUP(vector3, q, uint, u, 32, 4, 4); /* Execute the tests. */ TEST_VABA(, int, s, 8, 8); TEST_VABA(, int, s, 16, 4); TEST_VABA(, int, s, 32, 2); TEST_VABA(, uint, u, 8, 8); TEST_VABA(, uint, u, 16, 4); TEST_VABA(, uint, u, 32, 2); TEST_VABA(q, int, s, 8, 16); TEST_VABA(q, int, s, 16, 8); TEST_VABA(q, int, s, 32, 4); TEST_VABA(q, uint, u, 8, 16); TEST_VABA(q, uint, u, 16, 8); TEST_VABA(q, uint, u, 32, 4); CHECK_RESULTS (TEST_MSG, ""); }
void FNNAME (INSN_NAME) (void) { #define DECL_VMUL(VAR) \ DECL_VARIABLE(VAR, int, 16, 4); \ DECL_VARIABLE(VAR, int, 32, 2); \ DECL_VARIABLE(VAR, uint, 16, 4); \ DECL_VARIABLE(VAR, uint, 32, 2); \ DECL_VARIABLE(VAR, float, 32, 2); \ DECL_VARIABLE(VAR, int, 16, 8); \ DECL_VARIABLE(VAR, int, 32, 4); \ DECL_VARIABLE(VAR, uint, 16, 8); \ DECL_VARIABLE(VAR, uint, 32, 4); \ DECL_VARIABLE(VAR, float, 32, 4) /* vector_res = vmul_n(vector,val), then store the result. */ #define TEST_VMUL_N(Q, T1, T2, W, N, L) \ VECT_VAR(vector_res, T1, W, N) = \ vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ L); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)) DECL_VMUL(vector); DECL_VMUL(vector_res); clean_results (); /* Initialize vector from pre-initialized values. */ VLOAD(vector, buffer, , int, s, 16, 4); VLOAD(vector, buffer, , int, s, 32, 2); VLOAD(vector, buffer, , uint, u, 16, 4); VLOAD(vector, buffer, , uint, u, 32, 2); VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, int, s, 16, 8); VLOAD(vector, buffer, q, int, s, 32, 4); VLOAD(vector, buffer, q, uint, u, 16, 8); VLOAD(vector, buffer, q, uint, u, 32, 4); VLOAD(vector, buffer, q, float, f, 32, 4); /* Choose multiplier arbitrarily. */ TEST_VMUL_N(, int, s, 16, 4, 0x11); TEST_VMUL_N(, int, s, 32, 2, 0x22); TEST_VMUL_N(, uint, u, 16, 4, 0x33); TEST_VMUL_N(, uint, u, 32, 2, 0x44); TEST_VMUL_N(, float, f, 32, 2, 22.3f); TEST_VMUL_N(q, int, s, 16, 8, 0x55); TEST_VMUL_N(q, int, s, 32, 4, 0x66); TEST_VMUL_N(q, uint, u, 16, 8, 0x77); TEST_VMUL_N(q, uint, u, 32, 4, 0x88); TEST_VMUL_N(q, float, f, 32, 4, 88.9f); CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); }
void exec_vrecps(void) { int i; /* Basic test: y=vrecps(x), then store the result. */ #define TEST_VRECPS(Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)) /* No need for integer variants. */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) DECL_VARIABLE(vector2, float, 16, 4); DECL_VARIABLE(vector2, float, 16, 8); #endif DECL_VARIABLE(vector2, float, 32, 2); DECL_VARIABLE(vector2, float, 32, 4); #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) DECL_VARIABLE(vector_res, float, 16, 4); DECL_VARIABLE(vector_res, float, 16, 8); #endif DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily. */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) VDUP(vector, , float, f, 16, 4, 12.9f); VDUP(vector, q, float, f, 16, 8, 9.2f); #endif VDUP(vector, , float, f, 32, 2, 12.9f); VDUP(vector, q, float, f, 32, 4, 9.2f); #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) VDUP(vector2, , float, f, 16, 4, 8.9f); VDUP(vector2, q, float, f, 16, 8, 3.2f); #endif VDUP(vector2, , float, f, 32, 2, 8.9f); VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Apply the operator. */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) TEST_VRECPS(, float, f, 16, 4); TEST_VRECPS(q, float, f, 16, 8); #endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #define CMT " (positive input)" #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); #endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); /* Test FP variants with special input values (NaN). */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) VDUP(vector, , float, f, 16, 4, NAN); VDUP(vector2, q, float, f, 16, 8, NAN); #endif VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector2, q, float, f, 32, 4, NAN); /* Apply the operator. */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) TEST_VRECPS(, float, f, 16, 4); TEST_VRECPS(q, float, f, 16, 8); #endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN)" #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); #endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (infinity, 0). */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) VDUP(vector, , float, f, 16, 4, HUGE_VALF); VDUP(vector, q, float, f, 16, 8, 0.0f); VDUP(vector2, q, float, f, 16, 8, 3.2f); /* Restore a normal value. */ #endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value. */ /* Apply the operator. */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) TEST_VRECPS(, float, f, 16, 4); TEST_VRECPS(q, float, f, 16, 8); #endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (infinity, 0) and normal value" #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); #endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with only special input values (infinity, 0). */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) VDUP(vector, , float, f, 16, 4, HUGE_VALF); VDUP(vector, q, float, f, 16, 8, 0.0f); VDUP(vector2, , float, f, 16, 4, 0.0f); VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); #endif VDUP(vector, , float, f, 32, 2, HUGE_VALF); VDUP(vector, q, float, f, 32, 4, 0.0f); VDUP(vector2, , float, f, 32, 2, 0.0f); VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator */ #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) TEST_VRECPS(, float, f, 16, 4); TEST_VRECPS(q, float, f, 16, 8); #endif TEST_VRECPS(, float, f, 32, 2); TEST_VRECPS(q, float, f, 32, 4); #undef CMT #define CMT " FP special (infinity, 0)" #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); #endif CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); }
void exec_vrev (void) { /* Basic test: y=vrev(x), then store the result. */ #define TEST_VREV(Q, T1, T2, W, N, W2) \ VECT_VAR(vector_res, T1, W, N) = \ vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); clean_results (); /* Initialize input "vector" from "buffer" */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); /* Check vrev in each of the existing combinations */ #define TEST_MSG "VREV16" TEST_VREV(, int, s, 8, 8, 16); TEST_VREV(, uint, u, 8, 8, 16); TEST_VREV(, poly, p, 8, 8, 16); TEST_VREV(q, int, s, 8, 16, 16); TEST_VREV(q, uint, u, 8, 16, 16); TEST_VREV(q, poly, p, 8, 16, 16); dump_results_hex (TEST_MSG); #undef TEST_MSG #define TEST_MSG "VREV32" TEST_VREV(, int, s, 8, 8, 32); TEST_VREV(, int, s, 16, 4, 32); TEST_VREV(, uint, u, 8, 8, 32); TEST_VREV(, uint, u, 16, 4, 32); TEST_VREV(, poly, p, 8, 8, 32); TEST_VREV(, poly, p, 16, 4, 32); TEST_VREV(q, int, s, 8, 16, 32); TEST_VREV(q, int, s, 16, 8, 32); TEST_VREV(q, uint, u, 8, 16, 32); TEST_VREV(q, uint, u, 16, 8, 32); TEST_VREV(q, poly, p, 8, 16, 32); TEST_VREV(q, poly, p, 16, 8, 32); dump_results_hex (TEST_MSG); #undef TEST_MSG #define TEST_MSG "VREV64" TEST_VREV(, int, s, 8, 8, 64); TEST_VREV(, int, s, 16, 4, 64); TEST_VREV(, int, s, 32, 2, 64); TEST_VREV(, uint, u, 8, 8, 64); TEST_VREV(, uint, u, 16, 4, 64); TEST_VREV(, uint, u, 32, 2, 64); TEST_VREV(, poly, p, 8, 8, 64); TEST_VREV(, poly, p, 16, 4, 64); TEST_VREV(q, int, s, 8, 16, 64); TEST_VREV(q, int, s, 16, 8, 64); TEST_VREV(q, int, s, 32, 4, 64); TEST_VREV(q, uint, u, 8, 16, 64); TEST_VREV(q, uint, u, 16, 8, 64); TEST_VREV(q, uint, u, 32, 4, 64); TEST_VREV(q, poly, p, 8, 16, 64); TEST_VREV(q, poly, p, 16, 8, 64); TEST_VREV(, float, f, 32, 2, 64); TEST_VREV(q, float, f, 32, 4, 64); dump_results_hex (TEST_MSG); }
void exec_vrshl (void) { /* Basic test: v3=vrshl(v1,v2), then store the result. */ #define TEST_VRSHL(T3, Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector_shift, T3, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); clean_results (); /* Fill input vector with 0, to check behavior on limits */ VDUP(vector, , int, s, 8, 8, 0); VDUP(vector, , int, s, 16, 4, 0); VDUP(vector, , int, s, 32, 2, 0); VDUP(vector, , int, s, 64, 1, 0); VDUP(vector, , uint, u, 8, 8, 0); VDUP(vector, , uint, u, 16, 4, 0); VDUP(vector, , uint, u, 32, 2, 0); VDUP(vector, , uint, u, 64, 1, 0); VDUP(vector, q, int, s, 8, 16, 0); VDUP(vector, q, int, s, 16, 8, 0); VDUP(vector, q, int, s, 32, 4, 0); VDUP(vector, q, int, s, 64, 2, 0); VDUP(vector, q, uint, u, 8, 16, 0); VDUP(vector, q, uint, u, 16, 8, 0); VDUP(vector, q, uint, u, 32, 4, 0); VDUP(vector, q, uint, u, 64, 2, 0); /* Choose init value arbitrarily, will be used as shift amount */ /* Use values equal to one-less-than the type width to check behaviour on limits */ VDUP(vector_shift, , int, s, 8, 8, 7); VDUP(vector_shift, , int, s, 16, 4, 15); VDUP(vector_shift, , int, s, 32, 2, 31); VDUP(vector_shift, , int, s, 64, 1, 63); VDUP(vector_shift, q, int, s, 8, 16, 7); VDUP(vector_shift, q, int, s, 16, 8, 15); VDUP(vector_shift, q, int, s, 32, 4, 31); VDUP(vector_shift, q, int, s, 64, 2, 63); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (with input = 0)"); /* Use negative shift amounts */ VDUP(vector_shift, , int, s, 8, 8, -1); VDUP(vector_shift, , int, s, 16, 4, -2); VDUP(vector_shift, , int, s, 32, 2, -3); VDUP(vector_shift, , int, s, 64, 1, -4); VDUP(vector_shift, q, int, s, 8, 16, -7); VDUP(vector_shift, q, int, s, 16, 8, -11); VDUP(vector_shift, q, int, s, 32, 4, -13); VDUP(vector_shift, q, int, s, 64, 2, -20); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); /* Test again, with predefined input values */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); /* Choose init value arbitrarily, will be used as shift amount */ VDUP(vector_shift, , int, s, 8, 8, 1); VDUP(vector_shift, , int, s, 16, 4, 3); VDUP(vector_shift, , int, s, 32, 2, 8); VDUP(vector_shift, , int, s, 64, 1, -3); VDUP(vector_shift, q, int, s, 8, 16, 10); VDUP(vector_shift, q, int, s, 16, 8, 12); VDUP(vector_shift, q, int, s, 32, 4, 32); VDUP(vector_shift, q, int, s, 64, 2, 63); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex (TEST_MSG); /* Use negative shift amounts */ VDUP(vector_shift, , int, s, 8, 8, -1); VDUP(vector_shift, , int, s, 16, 4, -2); VDUP(vector_shift, , int, s, 32, 2, -3); VDUP(vector_shift, , int, s, 64, 1, -4); VDUP(vector_shift, q, int, s, 8, 16, -7); VDUP(vector_shift, q, int, s, 16, 8, -11); VDUP(vector_shift, q, int, s, 32, 4, -13); VDUP(vector_shift, q, int, s, 64, 2, -20); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (negative shift amount)"); /* Fill input vector with max value, to check behavior on limits */ VDUP(vector, , int, s, 8, 8, 0x7F); VDUP(vector, , int, s, 16, 4, 0x7FFF); VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); VDUP(vector, , uint, u, 8, 8, 0xFF); VDUP(vector, , uint, u, 16, 4, 0xFFFF); VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); VDUP(vector, q, int, s, 8, 16, 0x7F); VDUP(vector, q, int, s, 16, 8, 0x7FFF); VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); VDUP(vector, q, uint, u, 8, 16, 0xFF); VDUP(vector, q, uint, u, 16, 8, 0xFFFF); VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); /* Use -1 shift amount to check overflow with round_const */ VDUP(vector_shift, , int, s, 8, 8, -1); VDUP(vector_shift, , int, s, 16, 4, -1); VDUP(vector_shift, , int, s, 32, 2, -1); VDUP(vector_shift, , int, s, 64, 1, -1); VDUP(vector_shift, q, int, s, 8, 16, -1); VDUP(vector_shift, q, int, s, 16, 8, -1); VDUP(vector_shift, q, int, s, 32, 4, -1); VDUP(vector_shift, q, int, s, 64, 2, -1); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -1)"); /* Use -3 shift amount to check overflow with round_const */ VDUP(vector_shift, , int, s, 8, 8, -3); VDUP(vector_shift, , int, s, 16, 4, -3); VDUP(vector_shift, , int, s, 32, 2, -3); VDUP(vector_shift, , int, s, 64, 1, -3); VDUP(vector_shift, q, int, s, 8, 16, -3); VDUP(vector_shift, q, int, s, 16, 8, -3); VDUP(vector_shift, q, int, s, 32, 4, -3); VDUP(vector_shift, q, int, s, 64, 2, -3); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -3)"); /* Use negative shift amount as large as input vector width */ VDUP(vector_shift, , int, s, 8, 8, -8); VDUP(vector_shift, , int, s, 16, 4, -16); VDUP(vector_shift, , int, s, 32, 2, -32); VDUP(vector_shift, , int, s, 64, 1, -64); VDUP(vector_shift, q, int, s, 8, 16, -8); VDUP(vector_shift, q, int, s, 16, 8, -16); VDUP(vector_shift, q, int, s, 32, 4, -32); VDUP(vector_shift, q, int, s, 64, 2, -64); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (checking negative shift amount as large as input vector width)"); /* Test large shift amount */ VDUP(vector_shift, , int, s, 8, 8, 10); VDUP(vector_shift, , int, s, 16, 4, 20); VDUP(vector_shift, , int, s, 32, 2, 33); VDUP(vector_shift, , int, s, 64, 1, 65); VDUP(vector_shift, q, int, s, 8, 16, 9); VDUP(vector_shift, q, int, s, 16, 8, 16); VDUP(vector_shift, q, int, s, 32, 4, 32); VDUP(vector_shift, q, int, s, 64, 2, 64); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (large shift amount)"); /* Test large negative shift amount */ VDUP(vector_shift, , int, s, 8, 8, -10); VDUP(vector_shift, , int, s, 16, 4, -20); VDUP(vector_shift, , int, s, 32, 2, -33); VDUP(vector_shift, , int, s, 64, 1, -65); VDUP(vector_shift, q, int, s, 8, 16, -9); VDUP(vector_shift, q, int, s, 16, 8, -16); VDUP(vector_shift, q, int, s, 32, 4, -32); VDUP(vector_shift, q, int, s, 64, 2, -64); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); dump_results_hex2 (TEST_MSG, " (large negative shift amount)"); }
void exec_vrecpe(void) { int i; /* Basic test: y=vrecpe(x), then store the result. */ #define TEST_VRECPE(Q, T1, T2, W, N) \ VECT_VAR(vector_res, T1, W, N) = \ vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)) /* No need for 64 bits variants. */ DECL_VARIABLE(vector, uint, 32, 2); DECL_VARIABLE(vector, uint, 32, 4); DECL_VARIABLE(vector, float, 32, 2); DECL_VARIABLE(vector, float, 32, 4); DECL_VARIABLE(vector_res, uint, 32, 2); DECL_VARIABLE(vector_res, uint, 32, 4); DECL_VARIABLE(vector_res, float, 32, 2); DECL_VARIABLE(vector_res, float, 32, 4); clean_results (); /* Choose init value arbitrarily, positive. */ VDUP(vector, , uint, u, 32, 2, 0x12345678); VDUP(vector, , float, f, 32, 2, 1.9f); VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); VDUP(vector, q, float, f, 32, 4, 125.0f); /* Apply the operator. */ TEST_VRECPE(, uint, u, 32, 2); TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, uint, u, 32, 4); TEST_VRECPE(q, float, f, 32, 4); #define CMT " (positive input)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_positive, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_positive, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_positive, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_positive, CMT); /* Choose init value arbitrarily,negative. */ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); VDUP(vector, , float, f, 32, 2, -10.0f); VDUP(vector, q, uint, u, 32, 4, 0x89081234); VDUP(vector, q, float, f, 32, 4, -125.0f); /* Apply the operator. */ TEST_VRECPE(, uint, u, 32, 2); TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, uint, u, 32, 4); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " (negative input)" CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative, CMT); CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative, CMT); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_negative, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_negative, CMT); /* Test FP variants with special input values (NaN, infinity). */ VDUP(vector, , float, f, 32, 2, NAN); VDUP(vector, q, float, f, 32, 4, HUGE_VALF); /* Apply the operator. */ TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (NaN, infinity)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); /* Test FP variants with special input values (zero, large value). */ VDUP(vector, , float, f, 32, 2, 0.0f); VDUP(vector, q, float, f, 32, 4, 8.97229e37f /*9.0e37f*/); /* Apply the operator. */ TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (zero, large value)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); /* Test FP variants with special input values (-0, -infinity). */ VDUP(vector, , float, f, 32, 2, -0.0f); VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); /* Apply the operator. */ TEST_VRECPE(, float, f, 32, 2); TEST_VRECPE(q, float, f, 32, 4); #undef CMT #define CMT " FP special (-0, -infinity)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); /* Test FP variants with special input values (large negative value). */ VDUP(vector, , float, f, 32, 2, -9.0e37f); /* Apply the operator. */ TEST_VRECPE(, float, f, 32, 2); #undef CMT #define CMT " FP special (large negative value)" CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp4, CMT); }