gmx_simd4_real_t vector2Simd4Real(const std::vector<real> &v) { real mem[GMX_SIMD4_WIDTH*2]; real * p = gmx_simd4_align_r(mem); for (int i = 0; i < GMX_SIMD4_WIDTH; i++) { p[i] = v[i % v.size()]; // repeat vector contents to fill simd width } return gmx_simd4_load_r(p); }
struct pme_spline_work *make_pme_spline_work(int gmx_unused order) { struct pme_spline_work *work; #ifdef PME_SIMD4_SPREAD_GATHER real tmp[GMX_SIMD4_WIDTH*3], *tmp_aligned; gmx_simd4_real_t zero_S; gmx_simd4_real_t real_mask_S0, real_mask_S1; int of, i; snew_aligned(work, 1, SIMD4_ALIGNMENT); tmp_aligned = gmx_simd4_align_r(tmp); zero_S = gmx_simd4_setzero_r(); /* Generate bit masks to mask out the unused grid entries, * as we only operate on order of the 8 grid entries that are * load into 2 SIMD registers. */ for (of = 0; of < 2*GMX_SIMD4_WIDTH-(order-1); of++) { for (i = 0; i < 2*GMX_SIMD4_WIDTH; i++) { tmp_aligned[i] = (i >= of && i < of+order ? -1.0 : 1.0); } real_mask_S0 = gmx_simd4_load_r(tmp_aligned); real_mask_S1 = gmx_simd4_load_r(tmp_aligned+GMX_SIMD4_WIDTH); work->mask_S0[of] = gmx_simd4_cmplt_r(real_mask_S0, zero_S); work->mask_S1[of] = gmx_simd4_cmplt_r(real_mask_S1, zero_S); } #else work = NULL; #endif return work; }