Пример #1
0
__inline void mic_broadcast16x64(const double* inv, double* outv)
{
    __mmask8 k1 = _mm512_int2mask(0x0F);
    __mmask8 k2 = _mm512_int2mask(0xF0);
    for(int l = 0; l < 16; l += 2)
    {
        __m512d t = _mm512_setzero_pd();
        t = _mm512_mask_extload_pd(t, k1, &inv[(l%4)*4 + l/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
        t = _mm512_mask_extload_pd(t, k2, &inv[((l+1)%4)*4 + (l+1)/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);

        _mm512_store_pd(&outv[l*4], t);
    }
}
Пример #2
0
static void
avx512f_test (void)
{
  double v[8] = { -3.3, 2.6, 1.48, 9.104, -23.9, -173.37, -13.48, 69.78 };
  union512d res;

  res.x = foo (v);

  if (check_union512d (res, v))
    abort ();

  res.x = _mm512_setzero_pd ();

  res.x = foo_r (v);

  if (check_union512d (res, v))
    abort ();
}
Пример #3
0
static void
avx512f_test (void)
{
  int i;
  double e = 34.5;
  double v[8];
  union512d res;

  for (i = 0; i < 8; i++)
    v[i] = e;

  res.x = foo (e);

  if (check_union512d (res, v))
    abort ();

  res.x = _mm512_setzero_pd ();

  res.x = foo_r (e);

  if (check_union512d (res, v))
    abort ();
}
Пример #4
0
#include <stdlib.h>
#include <string.h>

#include "AVX512_DJACV.h"

#define VSIZE_B 64u
#define DBLE_SZ 8u
#define NDBLE_V (VSIZE_B / DBLE_SZ)
#define NPAIR_V NDBLE_V

static inline __m256d avx512_ddots(const unsigned m, const double *const restrict Gp, const double *const restrict Gq)
{
  register const double *const Gp_i = (const double*)__builtin_assume_aligned(Gp, VSIZE_B);
  register const double *const Gq_i = (const double*)__builtin_assume_aligned(Gq, VSIZE_B);

  register __m512d Gpp = _mm512_setzero_pd();
  register __m512d Gqq = _mm512_setzero_pd();
  register __m512d Gpq = _mm512_setzero_pd();

  for (register unsigned i = 0u; i < m; i += NDBLE_V) {
    register const __m512d Gpi = _mm512_load_pd(Gp_i + i);
    register const __m512d Gqi = _mm512_load_pd(Gq_i + i);

    Gpp = _mm512_fmadd_pd(Gpi, Gpi, Gpp);
    Gqq = _mm512_fmadd_pd(Gqi, Gqi, Gqq);
    Gpq = _mm512_fmadd_pd(Gpi, Gqi, Gpq);
  }

  register const double pq = _mm512_reduce_add_pd(Gpq);
  /* out[0] = Gpp; out[1] = Gqq; out[2] = Gpq; out[3] = |Gpq|; */
  return _mm256_set_pd(fabs(pq), pq, _mm512_reduce_add_pd(Gqq), _mm512_reduce_add_pd(Gpp));