Example #1
0
gmx_simd4_real_t
vector2Simd4Real(const std::vector<real> &v)
{
    real                mem[GMX_SIMD4_WIDTH*2];
    real *              p = gmx_simd4_align_r(mem);

    for (int i = 0; i < GMX_SIMD4_WIDTH; i++)
    {
        p[i] = v[i % v.size()];  // repeat vector contents to fill simd width
    }
    return gmx_simd4_load_r(p);
}
Example #2
0
struct pme_spline_work *make_pme_spline_work(int gmx_unused order)
{
    struct pme_spline_work *work;

#ifdef PME_SIMD4_SPREAD_GATHER
    real             tmp[GMX_SIMD4_WIDTH*3], *tmp_aligned;
    gmx_simd4_real_t zero_S;
    gmx_simd4_real_t real_mask_S0, real_mask_S1;
    int              of, i;

    snew_aligned(work, 1, SIMD4_ALIGNMENT);

    tmp_aligned = gmx_simd4_align_r(tmp);

    zero_S = gmx_simd4_setzero_r();

    /* Generate bit masks to mask out the unused grid entries,
     * as we only operate on order of the 8 grid entries that are
     * load into 2 SIMD registers.
     */
    for (of = 0; of < 2*GMX_SIMD4_WIDTH-(order-1); of++)
    {
        for (i = 0; i < 2*GMX_SIMD4_WIDTH; i++)
        {
            tmp_aligned[i] = (i >= of && i < of+order ? -1.0 : 1.0);
        }
        real_mask_S0      = gmx_simd4_load_r(tmp_aligned);
        real_mask_S1      = gmx_simd4_load_r(tmp_aligned+GMX_SIMD4_WIDTH);
        work->mask_S0[of] = gmx_simd4_cmplt_r(real_mask_S0, zero_S);
        work->mask_S1[of] = gmx_simd4_cmplt_r(real_mask_S1, zero_S);
    }
#else
    work = NULL;
#endif

    return work;
}