static void find_center (const vox_dot set[], size_t n, const struct vox_box *box, vox_dot res) { size_t i; __v4sf len = _mm_set_ps1 (n); __v4sf sum = _mm_set_ps1 (0.0); __v4sf voxel = _mm_load_ps (vox_voxel); __v4sf min = _mm_load_ps (box->min); /* * Subtract bounding box minimal value from voxel coordinates to reduce * computational error. I add it again after division by len. */ for (i=0; i<n; i++) sum += (_mm_load_ps (set[i]) - min); sum /= len; sum += min; /* * Align the center of division, so any voxel belongs to only one subspace * entirely. Faces of voxels may be the exception though */ __v4sf resv = sum / voxel; resv = _mm_ceil_ps (resv) * voxel; _mm_store_ps (res, resv); }
int main(int, char**) { volatile __m128 a = _mm_setzero_ps(); _mm_ceil_ps(a); volatile __m128i result = _mm_mullo_epi32(_mm_set1_epi32(42), _mm_set1_epi32(64)); (void)result; return 0; }
void extern avx512vl_test (void) { x1 = _mm256_roundscale_ps (x1, 0x42); x1 = _mm256_ceil_ps (x1); x1 = _mm256_floor_ps (x1); x1 = _mm256_mask_roundscale_ps (x1, 2, x1, 0x42); x1 = _mm256_maskz_roundscale_ps (2, x1, 0x42); x2 = _mm_roundscale_ps (x2, 0x42); x2 = _mm_ceil_ps (x2); x2 = _mm_floor_ps (x2); x2 = _mm_mask_roundscale_ps (x2, 2, x2, 0x42); x2 = _mm_maskz_roundscale_ps (2, x2, 0x42); }
__m128 test_mm_ceil_ps(__m128 x) { // CHECK-LABEL: test_mm_ceil_ps // CHECK: call <4 x float> @llvm.x86.sse41.round.ps // CHECK-ASM: roundps $2, %xmm{{.*}}, %xmm{{.*}} return _mm_ceil_ps(x); }
__m128 test_mm_ceil_ps(__m128 x) { // CHECK-LABEL: test_mm_ceil_ps // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2) return _mm_ceil_ps(x); }