Example #1
0
int
main (int argc, char *argv[])
{
  mpfr_t x, y, z;
  unsigned long k, n;
  volatile double d;
  double dd;

  tests_start_mpfr ();
  mpfr_test_init ();

#ifndef MPFR_DOUBLE_SPEC
  printf ("Warning! The MPFR_DOUBLE_SPEC macro is not defined. This means\n"
          "that you do not have a conforming C implementation and problems\n"
          "may occur with conversions between MPFR numbers and standard\n"
          "floating-point types. Please contact the MPFR team.\n");
#elif MPFR_DOUBLE_SPEC == 0
  /*
  printf ("The type 'double' of your C implementation does not seem to\n"
          "correspond to the IEEE-754 double precision. Though code has\n"
          "been written to support such implementations, tests have been\n"
          "done only on IEEE-754 double-precision implementations and\n"
          "conversions between MPFR numbers and standard floating-point\n"
          "types may be inaccurate. You may wish to contact the MPFR team\n"
          "for further testing.\n");
  */
  printf ("The type 'double' of your C implementation does not seem to\n"
          "correspond to the IEEE-754 double precision. Such particular\n"
          "implementations are not supported yet, and conversions between\n"
          "MPFR numbers and standard floating-point types may be very\n"
          "inaccurate.\n");
  printf ("FLT_RADIX    = %ld\n", (long) FLT_RADIX);
  printf ("DBL_MANT_DIG = %ld\n", (long) DBL_MANT_DIG);
  printf ("DBL_MIN_EXP  = %ld\n", (long) DBL_MIN_EXP);
  printf ("DBL_MAX_EXP  = %ld\n", (long) DBL_MAX_EXP);
#endif

  mpfr_init (x);

  mpfr_set_nan (x);
  d = mpfr_get_d (x, MPFR_RNDN);
  if (! DOUBLE_ISNAN (d))
    {
      printf ("ERROR for NAN (1)\n");
#ifdef MPFR_NANISNAN
      printf ("The reason is that NAN == NAN. Please look at the configure "
              "output\nand Section \"In case of problem\" of the INSTALL "
              "file.\n");
#endif
      exit (1);
    }
  mpfr_set_ui (x, 0, MPFR_RNDN);
  mpfr_set_d (x, d, MPFR_RNDN);
  if (! mpfr_nan_p (x))
    {
      printf ("ERROR for NAN (2)\n");
#ifdef MPFR_NANISNAN
      printf ("The reason is that NAN == NAN. Please look at the configure "
              "output\nand Section \"In case of problem\" of the INSTALL "
              "file.\n");
#endif
      exit (1);
    }

  d = 0.0;
  mpfr_set_d (x, d, MPFR_RNDN);
  MPFR_ASSERTN(mpfr_cmp_ui (x, 0) == 0 && MPFR_IS_POS(x));
  d = -d;
  mpfr_set_d (x, d, MPFR_RNDN);
  if (mpfr_cmp_ui (x, 0) != 0 || MPFR_IS_POS(x))
    {
      printf ("Error in mpfr_set_d on -0\n");
      exit (1);
    }

  mpfr_set_inf (x, 1);
  d = mpfr_get_d (x, MPFR_RNDN);
  mpfr_set_ui (x, 0, MPFR_RNDN);
  mpfr_set_d (x, d, MPFR_RNDN);
  MPFR_ASSERTN(mpfr_inf_p (x) && mpfr_sgn (x) > 0);

  mpfr_set_inf (x, -1);
  d = mpfr_get_d (x, MPFR_RNDN);
  mpfr_set_ui (x, 0, MPFR_RNDN);
  mpfr_set_d (x, d, MPFR_RNDN);
  MPFR_ASSERTN(mpfr_inf_p (x) && mpfr_sgn (x) < 0);

  mpfr_set_prec (x, 2);

  /* checks that denormalized are not flushed to zero */
  d = DBL_MIN; /* 2^(-1022) */
  for (n=0; n<52; n++, d /= 2.0)
    if (d != 0.0) /* should be 2^(-1022-n) */
      {
        mpfr_set_d (x, d, MPFR_RNDN);
        if (mpfr_cmp_ui_2exp (x, 1, -1022-n))
          {
            printf ("Wrong result for d=2^(%ld), ", -1022-n);
            printf ("got ");
            mpfr_out_str (stdout, 10, 10, x, MPFR_RNDN);
            printf ("\n");
            mpfr_print_binary (x);
            puts ("");
            exit (1);
          }
      }

   /* checks that rounds to nearest sets the last
     bit to zero in case of equal distance */
   mpfr_set_d (x, 5.0, MPFR_RNDN);
   if (mpfr_cmp_ui (x, 4))
     {
       printf ("Error in tset_d: expected 4.0, got ");
       mpfr_print_binary (x); putchar('\n');
       exit (1);
     }
   mpfr_set_d (x, -5.0, MPFR_RNDN);
   if (mpfr_cmp_si (x, -4))
     {
       printf ("Error in tset_d: expected -4.0, got ");
       mpfr_print_binary (x); putchar('\n');
       exit (1);
     }

   mpfr_set_d (x, 9.84891017624509146344e-01, MPFR_RNDU);
   if (mpfr_cmp_ui (x, 1))
     {
       printf ("Error in tset_d: expected 1.0, got ");
       mpfr_print_binary (x); putchar('\n');
       exit (1);
     }

  mpfr_init2 (z, 32);
  mpfr_set_d (z, 1.0, (mpfr_rnd_t) 0);
  if (mpfr_cmp_ui (z, 1))
    {
      mpfr_print_binary (z); puts ("");
      printf ("Error: 1.0 != 1.0\n");
      exit (1);
    }
  mpfr_set_prec (x, 53);
  mpfr_init2 (y, 53);
  mpfr_set_d (x, d=-1.08007920352320089721e+150, (mpfr_rnd_t) 0);
  if (mpfr_get_d1 (x) != d)
    {
      mpfr_print_binary (x); puts ("");
      printf ("Error: get_d o set_d <> identity for d = %1.20e %1.20e\n",
              d, mpfr_get_d1 (x));
      exit (1);
    }

  mpfr_set_d (x, 8.06294740693074521573e-310, (mpfr_rnd_t) 0);
  d = -6.72658901114033715233e-165;
  mpfr_set_d (x, d, (mpfr_rnd_t) 0);
  if (d != mpfr_get_d1 (x))
    {
      mpfr_print_binary (x);
      puts ("");
      printf ("Error: get_d o set_d <> identity for d = %1.20e %1.20e\n",
              d, mpfr_get_d1 (x));
      exit (1);
    }

  n = (argc==1) ? 500000 : atoi(argv[1]);
  for (k = 1; k <= n; k++)
    {
      do
        {
          d = DBL_RAND ();
        }
#ifdef HAVE_DENORMS
      while (0);
#else
      while (ABS(d) < DBL_MIN);
#endif
      mpfr_set_d (x, d, (mpfr_rnd_t) 0);
      dd = mpfr_get_d1 (x);
      if (d != dd && !(Isnan(d) && Isnan(dd)))
        {
          printf ("Mismatch on : %1.18g != %1.18g\n", d, mpfr_get_d1 (x));
          mpfr_print_binary (x);
          puts ("");
          exit (1);
        }
    }

  mpfr_clear (x);
  mpfr_clear (y);
  mpfr_clear (z);

  tests_end_mpfr ();
  return 0;
}
int main()
{
#ifndef __EMSCRIPTEN__
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

	printf ("{ \"workload\": %u, \"results\": [\n", N);
	assert(N%4 == 0); // Don't care about the tail for now.
	float *src = get_src();//(float*)aligned_alloc(16, N*sizeof(float));
	for(int i = 0; i < N; ++i)
		src[i] = (float)rand() / RAND_MAX;
	float *src2 = get_src2();//(float*)aligned_alloc(16, N*sizeof(float));
	for(int i = 0; i < N; ++i)
		src2[i] = (float)rand() / RAND_MAX;
	float *dst = get_dst();//(float*)aligned_alloc(16, N*sizeof(float));

	float scalarTime;
	SETCHART("load");
	START();
		for(int i = 0; i < N; ++i)
			dst[i] = src[i];
	ENDSCALAR(checksum_dst(dst), "scalar");

	LS_TEST("_mm_load_ps", _mm_load_ps, 0, _mm_store_ps, 0);
	LS_TEST("_mm_load_ps1", _mm_load_ps1, 1, _mm_store_ps, 0);
	LS_TEST("_mm_load_ss", _mm_load_ss, 1, _mm_store_ps, 0);
	LS_TEST("_mm_load1_ps", _mm_load1_ps, 1, _mm_store_ps, 0);
	// _mm_loadh_pi
	// _mm_loadl_pi
	LS_TEST("_mm_loadr_ps", _mm_loadr_ps, 0, _mm_store_ps, 0);
	LS_TEST("_mm_loadu_ps", _mm_loadu_ps, 1, _mm_store_ps, 0);

	SETCHART("set");
	SS_TEST("_mm_set_ps", _mm_set_ps(src[i+2], src[i+1], src[i+5], src[i+0]));
	SS_TEST("_mm_set_ps1", _mm_set_ps1(src[i]));
	SS_TEST("_mm_set_ss", _mm_set_ss(src[i]));
	SS_TEST("_mm_set1_ps", _mm_set1_ps(src[i]));
	SS_TEST("_mm_setr_ps", _mm_set_ps(src[i+2], src[i+1], src[i+5], src[i+0]));
	SS_TEST("_mm_setzero_ps", _mm_setzero_ps());

	SETCHART("move");
	SS_TEST("_mm_move_ss", _mm_move_ss(_mm_load_ps(src+i), _mm_load_ps(src2+i)));
	SS_TEST("_mm_movehl_ps", _mm_movehl_ps(_mm_load_ps(src+i), _mm_load_ps(src2+i)));
	SS_TEST("_mm_movelh_ps", _mm_movelh_ps(_mm_load_ps(src+i), _mm_load_ps(src2+i)));

	SETCHART("store");
	LS_TEST("_mm_store_ps", _mm_load_ps, 0, _mm_store_ps, 0);
	LS_TEST("_mm_store_ps1", _mm_load_ps, 0, _mm_store_ps1, 0);
	LS_TEST("_mm_store_ss", _mm_load_ps, 0, _mm_store_ss, 1);
	LS64_TEST("_mm_storeh_pi", _mm_load_ps, 0, _mm_storeh_pi, 1);
	LS64_TEST("_mm_storel_pi", _mm_load_ps, 0, _mm_storel_pi, 1);
	LS_TEST("_mm_storer_ps", _mm_load_ps, 0, _mm_storer_ps, 0);
	LS_TEST("_mm_storeu_ps", _mm_load_ps, 0, _mm_storeu_ps, 1);
	LS_TEST("_mm_stream_ps", _mm_load_ps, 0, _mm_stream_ps, 0);

	SETCHART("arithmetic");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] += src2[0]; dst[1] += src2[1]; dst[2] += src2[2]; dst[3] += src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar add");
	BINARYOP_TEST("_mm_add_ps", _mm_add_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_add_ss", _mm_add_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] /= src2[0]; dst[1] /= src2[1]; dst[2] /= src2[2]; dst[3] /= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar div");
	BINARYOP_TEST("_mm_div_ps", _mm_div_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_div_ss", _mm_div_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] *= src2[0]; dst[1] *= src2[1]; dst[2] *= src2[2]; dst[3] *= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar mul");
	BINARYOP_TEST("_mm_mul_ps", _mm_mul_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_mul_ss", _mm_mul_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] -= src2[0]; dst[1] -= src2[1]; dst[2] -= src2[2]; dst[3] -= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar sub");
	BINARYOP_TEST("_mm_sub_ps", _mm_sub_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_sub_ss", _mm_sub_ss, _mm_load_ps(src), _mm_load_ps(src2));

	SETCHART("roots");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = 1.f / dst[0]; dst[1] = 1.f / dst[1]; dst[2] = 1.f / dst[2]; dst[3] = 1.f / dst[3]; } ENDSCALAR(checksum_dst(dst), "scalar rcp");
	UNARYOP_TEST("_mm_rcp_ps", _mm_rcp_ps, _mm_load_ps(src));
	UNARYOP_TEST("_mm_rcp_ss", _mm_rcp_ss, _mm_load_ps(src));

	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = 1.f / sqrtf(dst[0]); dst[1] = 1.f / sqrtf(dst[1]); dst[2] = 1.f / sqrtf(dst[2]); dst[3] = 1.f / sqrtf(dst[3]); } ENDSCALAR(checksum_dst(dst), "scalar rsqrt");
	UNARYOP_TEST("_mm_rsqrt_ps", _mm_rsqrt_ps, _mm_load_ps(src));
	UNARYOP_TEST("_mm_rsqrt_ss", _mm_rsqrt_ss, _mm_load_ps(src));

	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = sqrtf(dst[0]); dst[1] = sqrtf(dst[1]); dst[2] = sqrtf(dst[2]); dst[3] = sqrtf(dst[3]); } ENDSCALAR(checksum_dst(dst), "scalar sqrt");
	UNARYOP_TEST("_mm_sqrt_ps", _mm_sqrt_ps, _mm_load_ps(src));
	UNARYOP_TEST("_mm_sqrt_ss", _mm_sqrt_ss, _mm_load_ps(src));

	SETCHART("logical");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastf(fcastu(dst[0]) & fcastu(src2[0])); dst[1] = ucastf(fcastu(dst[1]) & fcastu(src2[1])); dst[2] = ucastf(fcastu(dst[2]) & fcastu(src2[2])); dst[3] = ucastf(fcastu(dst[3]) & fcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar and");
	BINARYOP_TEST("_mm_and_ps", _mm_and_ps, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastf((~fcastu(dst[0])) & fcastu(src2[0])); dst[1] = ucastf((~fcastu(dst[1])) & fcastu(src2[1])); dst[2] = ucastf((~fcastu(dst[2])) & fcastu(src2[2])); dst[3] = ucastf((~fcastu(dst[3])) & fcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar andnot");
	BINARYOP_TEST("_mm_andnot_ps", _mm_andnot_ps, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastf(fcastu(dst[0]) | fcastu(src2[0])); dst[1] = ucastf(fcastu(dst[1]) | fcastu(src2[1])); dst[2] = ucastf(fcastu(dst[2]) | fcastu(src2[2])); dst[3] = ucastf(fcastu(dst[3]) | fcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar or");
	BINARYOP_TEST("_mm_or_ps", _mm_or_ps, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastf(fcastu(dst[0]) ^ fcastu(src2[0])); dst[1] = ucastf(fcastu(dst[1]) ^ fcastu(src2[1])); dst[2] = ucastf(fcastu(dst[2]) ^ fcastu(src2[2])); dst[3] = ucastf(fcastu(dst[3]) ^ fcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar xor");
	BINARYOP_TEST("_mm_xor_ps", _mm_xor_ps, _mm_load_ps(src), _mm_load_ps(src2));

	SETCHART("cmp");
#ifndef __EMSCRIPTEN__ // TODO: Disabled due to https://github.com/kripken/emscripten/issues/2841
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] == src2[0]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] == src2[1]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] == src2[2]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] == src2[3]) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp==");
	BINARYOP_TEST("_mm_cmpeq_ps", _mm_cmpeq_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmpeq_ss", _mm_cmpeq_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] >= src2[0]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] >= src2[1]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] >= src2[2]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] >= src2[3]) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>=");
	BINARYOP_TEST("_mm_cmpge_ps", _mm_cmpge_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmpge_ss", _mm_cmpge_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] > src2[0]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] > src2[1]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] > src2[2]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] > src2[3]) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>");
	BINARYOP_TEST("_mm_cmpgt_ps", _mm_cmpgt_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmpgt_ss", _mm_cmpgt_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] <= src2[0]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] <= src2[1]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] <= src2[2]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] <= src2[3]) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<=");
	BINARYOP_TEST("_mm_cmple_ps", _mm_cmple_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmple_ss", _mm_cmple_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] < src2[0]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] < src2[1]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] < src2[2]) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] < src2[3]) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<");
	BINARYOP_TEST("_mm_cmplt_ps", _mm_cmplt_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmplt_ss", _mm_cmplt_ss, _mm_load_ps(src), _mm_load_ps(src2));
#endif

	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (!Isnan(dst[0]) && !Isnan(src2[0])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (!Isnan(dst[1]) && !Isnan(src2[1])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (!Isnan(dst[2]) && !Isnan(src2[2])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (!Isnan(dst[3]) && !Isnan(src2[3])) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpord");
	BINARYOP_TEST("_mm_cmpord_ps", _mm_cmpord_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmpord_ss", _mm_cmpord_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (Isnan(dst[0]) || Isnan(src2[0])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[1] = (Isnan(dst[1]) || Isnan(src2[1])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[2] = (Isnan(dst[2]) || Isnan(src2[2])) ? ucastf(0xFFFFFFFFU) : 0.f; dst[3] = (Isnan(dst[3]) || Isnan(src2[3])) ? ucastf(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpunord");
	BINARYOP_TEST("_mm_cmpunord_ps", _mm_cmpunord_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_cmpunord_ss", _mm_cmpunord_ss, _mm_load_ps(src), _mm_load_ps(src2));

	SETCHART("max");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Max(dst[0], src2[0]); dst[1] = Max(dst[1], src2[1]); dst[2] = Max(dst[2], src2[2]); dst[3] = Max(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar max");
	BINARYOP_TEST("_mm_max_ps", _mm_max_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_max_ss", _mm_max_ss, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Min(dst[0], src2[0]); dst[1] = Min(dst[1], src2[1]); dst[2] = Min(dst[2], src2[2]); dst[3] = Min(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar min");
	BINARYOP_TEST("_mm_min_ps", _mm_min_ps, _mm_load_ps(src), _mm_load_ps(src2));
	BINARYOP_TEST("_mm_min_ss", _mm_min_ss, _mm_load_ps(src), _mm_load_ps(src2));

	SETCHART("shuffle");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[3] = dst[1]; dst[2] = dst[0]; dst[1] = src2[3]; dst[0] = src2[2]; } ENDSCALAR(checksum_dst(dst), "scalar shuffle");
//	BINARYOP_TEST("_mm_shuffle_ps", _mm_shuffle_ps, _mm_load_ps(src), _mm_load_ps(src2));
	START();
		__m128 o0 = _mm_load_ps(src);
		__m128 o1 = _mm_load_ps(src2);
		for(int i = 0; i < N; i += 4)
			o0 = _mm_shuffle_ps(o0, o1, _MM_SHUFFLE(1, 0, 3, 2));
		_mm_store_ps(dst, o0);
	END(checksum_dst(dst), "_mm_shuffle_ps");

	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = dst[2]; dst[1] = src2[2]; dst[2] = dst[3]; dst[3] = src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar unpackhi_ps");
	BINARYOP_TEST("_mm_unpackhi_ps", _mm_unpackhi_ps, _mm_load_ps(src), _mm_load_ps(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[2] = dst[1]; dst[1] = dst[0]; dst[0] = src2[0]; dst[3] = src2[1]; } ENDSCALAR(checksum_dst(dst), "scalar unpacklo_ps");
	BINARYOP_TEST("_mm_unpacklo_ps", _mm_unpacklo_ps, _mm_load_ps(src), _mm_load_ps(src2));
	printf("]}\n");
/*
	printf("Finished!\n");
	printf("Total time spent in scalar intrinsics: %f msecs.\n", (double)scalarTotalTicks * 1000.0 / ticks_per_sec());
	printf("Total time spent in SSE1 intrinsics: %f msecs.\n", (double)simdTotalTicks * 1000.0 / ticks_per_sec());
	if (scalarTotalTicks > simdTotalTicks)
		printf("SSE1 was %.3fx faster than scalar!\n", (double)scalarTotalTicks / simdTotalTicks);
	else
		printf("SSE1 was %.3fx slower than scalar!\n", (double)simdTotalTicks / scalarTotalTicks);
*/
#ifdef __EMSCRIPTEN__
	fprintf(stderr,"User Agent: %s\n", emscripten_run_script_string("navigator.userAgent"));
	printf("/*Test finished! Now please close Firefox to continue with benchmark_sse1.py.*/\n");
#endif
	exit(0);
}
Example #3
0
int
main (void)
{
  double x, y;
  mpfr_t xx, yy;
  int c;
  long i;
  mpfr_prec_t p;

  tests_start_mpfr ();

  mpfr_init (xx);
  mpfr_init (yy);

  mpfr_set_prec (xx, 2);
  mpfr_set_prec (yy, 2);
  mpfr_set_str_binary(xx, "-0.10E0");
  mpfr_set_str_binary(yy, "-0.10E0");
  if ((mpfr_cmp) (xx, yy))
    {
      printf ("mpfr_cmp (xx, yy) returns non-zero for prec=2\n");
      exit (1);
    }

  mpfr_set_prec (xx, 65);
  mpfr_set_prec (yy, 65);
  mpfr_set_str_binary(xx, "0.10011010101000110101010000000011001001001110001011101011111011101E623");
  mpfr_set_str_binary(yy, "0.10011010101000110101010000000011001001001110001011101011111011100E623");
  p = 0;
  if (mpfr_cmp2 (xx, yy, &p) <= 0 || p != 64)
    {
      printf ("Error (1) in mpfr_cmp2\n");
      exit (1);
    }
  mpfr_set_str_binary(xx, "0.10100010001110110111000010001000010011111101000100011101000011100");
  mpfr_set_str_binary(yy, "0.10100010001110110111000010001000010011111101000100011101000011011");
  p = 0;
  if (mpfr_cmp2 (xx, yy, &p) <= 0 || p != 64)
    {
      printf ("Error (2) in mpfr_cmp2\n");
      exit (1);
    }

  mpfr_set_prec (xx, 160); mpfr_set_prec (yy, 160);
  mpfr_set_str_binary (xx, "0.1E1");
  mpfr_set_str_binary (yy, "0.1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111100000110001110100");
  p = 0;
  if (mpfr_cmp2 (xx, yy, &p) <= 0 || p != 144)
    {
      printf ("Error (3) in mpfr_cmp2\n");
      exit (1);
    }

  mpfr_set_prec (xx, 53);
  mpfr_set_prec (yy, 200);
  mpfr_set_ui (xx, 1, (mpfr_rnd_t) 0);
  mpfr_set_ui (yy, 1, (mpfr_rnd_t) 0);
  if (mpfr_cmp (xx, yy) != 0)
    {
      printf ("Error in mpfr_cmp: 1.0 != 1.0\n");
      exit (1);
    }
  mpfr_set_prec (yy, 31);
  mpfr_set_str (xx, "1.0000000002", 10, (mpfr_rnd_t) 0);
  mpfr_set_ui (yy, 1, (mpfr_rnd_t) 0);
  if (!(mpfr_cmp (xx,yy)>0))
    {
      printf ("Error in mpfr_cmp: not 1.0000000002 > 1.0\n");
      exit (1);
    }
  mpfr_set_prec (yy, 53);

  /* bug found by Gerardo Ballabio */
  mpfr_set_ui(xx, 0, MPFR_RNDN);
  mpfr_set_str (yy, "0.1", 10, MPFR_RNDN);
  if ((c = mpfr_cmp (xx, yy)) >= 0)
    {
      printf ("Error in mpfr_cmp(0.0, 0.1), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, 1);
  mpfr_set_str (yy, "-23489745.0329", 10, MPFR_RNDN);
  if ((c = mpfr_cmp (xx, yy)) <= 0)
    {
      printf ("Error in mpfr_cmp(Infp, 23489745.0329), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, 1);
  mpfr_set_inf (yy, -1);
  if ((c = mpfr_cmp (xx, yy)) <= 0)
    {
      printf ("Error in mpfr_cmp(Infp, Infm), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, -1);
  mpfr_set_inf (yy, 1);
  if ((c = mpfr_cmp (xx, yy)) >= 0)
    {
      printf ("Error in mpfr_cmp(Infm, Infp), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, 1);
  mpfr_set_inf (yy, 1);
  if ((c = mpfr_cmp (xx, yy)) != 0)
    {
      printf ("Error in mpfr_cmp(Infp, Infp), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, -1);
  mpfr_set_inf (yy, -1);
  if ((c = mpfr_cmp (xx, yy)) != 0)
    {
      printf ("Error in mpfr_cmp(Infm, Infm), gives %d\n", c);
      exit (1);
    }

  mpfr_set_inf (xx, -1);
  mpfr_set_str (yy, "2346.09234", 10, MPFR_RNDN);
  if ((c = mpfr_cmp (xx, yy)) >= 0)
    {
      printf ("Error in mpfr_cmp(Infm, 2346.09234), gives %d\n", c);
      exit (1);
    }

  mpfr_set_ui (xx, 0, MPFR_RNDN);
  mpfr_set_ui (yy, 1, MPFR_RNDN);
  if ((c = mpfr_cmp3 (xx, yy, 1)) >= 0)
    {
      printf ("Error: mpfr_cmp3 (0, 1, 1) gives %d instead of"
              " a negative value\n", c);
      exit (1);
    }
  if ((c = mpfr_cmp3 (xx, yy, -1)) <= 0)
    {
      printf ("Error: mpfr_cmp3 (0, 1, -1) gives %d instead of"
              " a positive value\n", c);
      exit (1);
    }

  for (i=0; i<500000; )
    {
      x = DBL_RAND ();
      y = DBL_RAND ();
      if (!Isnan(x) && !Isnan(y))
        {
          i++;
          mpfr_set_d (xx, x, MPFR_RNDN);
          mpfr_set_d (yy, y, MPFR_RNDN);
          c = mpfr_cmp (xx,yy);
          if ((c>0 && x<=y) || (c==0 && x!=y) || (c<0 && x>=y))
            {
              printf ("Error in mpfr_cmp with x=%1.20e, y=%1.20e"
                      " mpfr_cmp(x,y)=%d\n", x, y, c);
              exit (1);
            }
        }
    }

  /* Check for NAN */
  mpfr_set_nan (xx);
  mpfr_clear_erangeflag ();
  c = (mpfr_cmp) (xx, yy);
  if (c != 0 || !mpfr_erangeflag_p () )
    {
      printf ("NAN error (1)\n");
      exit (1);
    }
  mpfr_clear_erangeflag ();
  c = (mpfr_cmp) (yy, xx);
  if (c != 0 || !mpfr_erangeflag_p () )
    {
      printf ("NAN error (2)\n");
      exit (1);
    }
  mpfr_clear_erangeflag ();
  c = (mpfr_cmp) (xx, xx);
  if (c != 0 || !mpfr_erangeflag_p () )
    {
      printf ("NAN error (3)\n");
      exit (1);
    }

  mpfr_clear (xx);
  mpfr_clear (yy);

  tests_end_mpfr ();
  return 0;
}
int main()
{
#ifndef __EMSCRIPTEN__
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

	printf ("{ \"workload\": %u, \"results\": [\n", N);
	assert(N%2 == 0); // Don't care about the tail for now.
	double *src = get_src_d();//(float*)aligned_alloc(16, N*sizeof(float));
	for(int i = 0; i < N; ++i)
		src[i] = (double)rand() / RAND_MAX;
	double *src2 = get_src2_d();//(float*)aligned_alloc(16, N*sizeof(float));
	for(int i = 0; i < N; ++i)
		src2[i] = (double)rand() / RAND_MAX;
	double *dst = get_dst_d();//(float*)aligned_alloc(16, N*sizeof(float));

	float scalarTime;
	SETCHART("load");
	START();
		for(int i = 0; i < N; ++i)
			dst[i] = src[i];
	ENDSCALAR(checksum_dst(dst), "scalar");

	LS_TEST("_mm_load_pd", _mm_load_pd, 0, _mm_store_pd, double*, 0, 2);
	LS_TEST("_mm_load_pd1", _mm_load_pd1, 1, _mm_store_pd, double*, 0, 2);
	LS_TEST("_mm_load_sd", _mm_load_sd, 1, _mm_store_pd, double*, 0, 2);
	// _mm_load_si128
	LS_TEST("_mm_load1_pd", _mm_load1_pd, 1, _mm_store_pd, double*, 0, 2);

	__m128d tempReg = _mm_set_pd(1.0, 2.0);
	LSH_TEST("_mm_loadh_pd", tempReg, _mm_loadh_pd, double*, 1, _mm_store_pd, double*, 0, 2);
	// _mm_loadl_epi64
	LSH_TEST("_mm_loadl_pd", tempReg, _mm_loadh_pd, double*, 1, _mm_store_pd, double*, 0, 2);

	LS_TEST("_mm_loadr_pd", _mm_loadr_pd, 0, _mm_store_pd, double*, 0, 2);
	LS_TEST("_mm_loadu_pd", _mm_loadu_pd, 1, _mm_store_pd, double*, 0, 2);
	// _mm_loadu_si128

	SETCHART("set");
/*	_mm_set_epi16
	_mm_set_epi32
	_mm_set_epi64
	_mm_set_epi64x
	_mm_set_epi8 */
	SS_TEST_D("_mm_set_pd", _mm_set_pd(src[i+2], src[i+0]));
	//SS_TEST_D("_mm_set_pd1", _mm_set_pd1(src[i]));
	SS_TEST_D("_mm_set_sd", _mm_set_sd(src[i]));
/*	_mm_set1_epi16
	_mm_set1_epi32
	_mm_set1_epi64
	_mm_set1_epi64x
	_mm_set1_epi8 */
	SS_TEST_D("_mm_set1_pd", _mm_set1_pd(src[i]));
/* 	_mm_setr_epi16
	_mm_setr_epi32
	_mm_setr_epi64
	_mm_setr_epi8 */
	SS_TEST_D("_mm_setr_pd", _mm_set_pd(src[i+2], src[i+0]));
	SS_TEST_D("_mm_setzero_pd", _mm_setzero_pd());
//	_mm_setzero_si128

	SETCHART("move");
	// _mm_move_epi64
	SS_TEST_D("_mm_move_sd", _mm_move_sd(_mm_load_pd(src+i), _mm_load_pd(src2+i)));

	SETCHART("store");
	// _mm_maskmoveu_si128
	LS_TEST("_mm_store_pd", _mm_load_pd, 0, _mm_store_pd, double*, 0, 2);
//	LS_TEST("_mm_store_pd1", _mm_load_pd, 0, _mm_store_pd1, double*, 0);
	LS_TEST("_mm_store_sd", _mm_load_pd, 0, _mm_store_sd, double*, 1, 2);
	// _mm_store_si128
	// _mm_store1_pd
	LS64_TEST("_mm_storeh_pi", _mm_load_pd, 0, _mm_storeh_pi, 1, 2);
	// _mm_storel_epi64
	LS64_TEST("_mm_storel_pi", _mm_load_pd, 0, _mm_storel_pi, 1, 2);
	LS_TEST("_mm_storer_pd", _mm_load_pd, 0, _mm_storer_pd, double*, 0, 2);
	LS_TEST("_mm_storeu_pd", _mm_load_pd, 0, _mm_storeu_pd, double*, 1, 2);
	// _mm_storeu_si128
	LS_TEST("_mm_stream_pd", _mm_load_pd, 0, _mm_stream_pd, double*, 0, 2);
	// _mm_stream_si128
	// _mm_stream_si32
	// _mm_stream_si64

	SETCHART("arithmetic");
	// _mm_add_epi16
	// _mm_add_epi32
	// _mm_add_epi64
	// _mm_add_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] += src2[0]; dst[1] += src2[1]; dst[2] += src2[2]; dst[3] += src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar add");
	BINARYOP_TEST_D("_mm_add_pd", _mm_add_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_add_sd", _mm_add_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_adds_epi16
	// _mm_adds_epi8
	// _mm_adds_epu16
	// _mm_adds_epu8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] /= src2[0]; dst[1] /= src2[1]; dst[2] /= src2[2]; dst[3] /= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar div");
	BINARYOP_TEST_D("_mm_div_pd", _mm_div_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_div_sd", _mm_div_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_madd_epi16
	// _mm_mul_epu32
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] *= src2[0]; dst[1] *= src2[1]; dst[2] *= src2[2]; dst[3] *= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar mul");
	BINARYOP_TEST_D("_mm_mul_pd", _mm_mul_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_mul_sd", _mm_mul_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_mulhi_epi16
	// _mm_mulhi_epu16
	// _mm_mullo_epi16
	// _mm_sad_epu8
	// _mm_sub_epi16
	// _mm_sub_epi32
	// _mm_sub_epi64
	// _mm_sub_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] -= src2[0]; dst[1] -= src2[1]; dst[2] -= src2[2]; dst[3] -= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar sub");
	BINARYOP_TEST_D("_mm_sub_pd", _mm_sub_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_sub_sd", _mm_sub_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_subs_epi16
	// _mm_subs_epi8
	// _mm_subs_epu16
	// _mm_subs_epu8

	SETCHART("roots");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = sqrt(dst[0]); dst[1] = sqrt(dst[1]); dst[2] = sqrt(dst[2]); dst[3] = sqrt(dst[3]); } ENDSCALAR(checksum_dst(dst), "scalar sqrt");
	UNARYOP_TEST_D("_mm_sqrt_pd", _mm_sqrt_pd, _mm_load_pd(src));
//	UNARYOP_TEST_D("_mm_sqrt_sd", _mm_sqrt_sd, _mm_load_pd(src));

	SETCHART("logical");
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) & dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) & dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) & dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) & dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar and");
	BINARYOP_TEST_D("_mm_and_pd", _mm_and_pd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_and_si128
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd((~dcastu(dst[0])) & dcastu(src2[0])); dst[1] = ucastd((~dcastu(dst[1])) & dcastu(src2[1])); dst[2] = ucastd((~dcastu(dst[2])) & dcastu(src2[2])); dst[3] = ucastd((~dcastu(dst[3])) & dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar andnot");
	BINARYOP_TEST_D("_mm_andnot_pd", _mm_andnot_pd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_andnot_si128
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) | dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) | dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) | dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) | dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar or");
	BINARYOP_TEST_D("_mm_or_pd", _mm_or_pd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_or_si128
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) ^ dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) ^ dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) ^ dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) ^ dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar xor");
	BINARYOP_TEST_D("_mm_xor_pd", _mm_xor_pd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_xor_si128

	SETCHART("cmp");
	// _mm_cmpeq_epi16
	// _mm_cmpeq_epi32
	// _mm_cmpeq_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] == src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] == src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] == src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] == src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp==");
	BINARYOP_TEST_D("_mm_cmpeq_pd", _mm_cmpeq_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmpeq_sd", _mm_cmpeq_sd, _mm_load_pd(src), _mm_load_pd(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] >= src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] >= src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] >= src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] >= src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>=");
	BINARYOP_TEST_D("_mm_cmpge_pd", _mm_cmpge_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmpge_sd", _mm_cmpge_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_cmpgt_epi16
	// _mm_cmpgt_epi32
	// _mm_cmpgt_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] > src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] > src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] > src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] > src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>");
	BINARYOP_TEST_D("_mm_cmpgt_pd", _mm_cmpgt_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmpgt_sd", _mm_cmpgt_sd, _mm_load_pd(src), _mm_load_pd(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] <= src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] <= src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] <= src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] <= src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<=");
	BINARYOP_TEST_D("_mm_cmple_pd", _mm_cmple_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmple_sd", _mm_cmple_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_cmplt_epi16
	// _mm_cmplt_epi32
	// _mm_cmplt_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] < src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] < src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] < src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] < src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<");
	BINARYOP_TEST_D("_mm_cmplt_pd", _mm_cmplt_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmplt_sd", _mm_cmplt_sd, _mm_load_pd(src), _mm_load_pd(src2));

	/*_mm_cmpneq_pd
	_mm_cmpneq_sd
	_mm_cmpnge_pd
	_mm_cmpnge_sd
	_mm_cmpngt_pd
	_mm_cmpngt_sd
	_mm_cmpnle_pd
	_mm_cmpnle_sd
	_mm_cmpnlt_pd
	_mm_cmpnlt_sd*/

	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (!Isnan(dst[0]) && !Isnan(src2[0])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (!Isnan(dst[1]) && !Isnan(src2[1])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (!Isnan(dst[2]) && !Isnan(src2[2])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (!Isnan(dst[3]) && !Isnan(src2[3])) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpord");
	BINARYOP_TEST_D("_mm_cmpord_pd", _mm_cmpord_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmpord_sd", _mm_cmpord_sd, _mm_load_pd(src), _mm_load_pd(src2));
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (Isnan(dst[0]) || Isnan(src2[0])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (Isnan(dst[1]) || Isnan(src2[1])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (Isnan(dst[2]) || Isnan(src2[2])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (Isnan(dst[3]) || Isnan(src2[3])) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpunord");
	BINARYOP_TEST_D("_mm_cmpunord_pd", _mm_cmpunord_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_cmpunord_sd", _mm_cmpunord_sd, _mm_load_pd(src), _mm_load_pd(src2));

	SETCHART("max");
	// _mm_max_epi16
	// _mm_max_epu8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Max(dst[0], src2[0]); dst[1] = Max(dst[1], src2[1]); dst[2] = Max(dst[2], src2[2]); dst[3] = Max(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar max");
	BINARYOP_TEST_D("_mm_max_pd", _mm_max_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_max_sd", _mm_max_sd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_min_epi16
	// _mm_min_epu8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Min(dst[0], src2[0]); dst[1] = Min(dst[1], src2[1]); dst[2] = Min(dst[2], src2[2]); dst[3] = Min(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar min");
	BINARYOP_TEST_D("_mm_min_pd", _mm_min_pd, _mm_load_pd(src), _mm_load_pd(src2));
	BINARYOP_TEST_D("_mm_min_sd", _mm_min_sd, _mm_load_pd(src), _mm_load_pd(src2));

	SETCHART("shuffle");
	// _mm_extract_epi16
	// _mm_insert_epi16
	// _mm_shuffle_epi32
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[3] = dst[1]; dst[2] = dst[0]; dst[1] = src2[3]; dst[0] = src2[2]; } ENDSCALAR(checksum_dst(dst), "scalar shuffle");
//	BINARYOP_TEST_D("_mm_shuffle_pd", _mm_shuffle_pd, _mm_load_pd(src), _mm_load_pd(src2));
	START();
		__m128 o0 = _mm_load_pd(src);
		__m128 o1 = _mm_load_pd(src2);
		for(int i = 0; i < N; i += 4)
			o0 = _mm_shuffle_pd(o0, o1, _MM_SHUFFLE(1, 0, 3, 2));
		_mm_store_pd(dst, o0);
	END(checksum_dst(dst), "_mm_shuffle_pd");

	// _mm_shufflehi_epi16
	// _mm_shufflelo_epi16
	// _mm_unpackhi_epi16
	// _mm_unpackhi_epi32
	// _mm_unpackhi_epi64
	// _mm_unpackhi_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = dst[2]; dst[1] = src2[2]; dst[2] = dst[3]; dst[3] = src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar unpackhi_pd");
	BINARYOP_TEST_D("_mm_unpackhi_pd", _mm_unpackhi_pd, _mm_load_pd(src), _mm_load_pd(src2));
	// _mm_unpacklo_epi16
	// _mm_unpacklo_epi32
	// _mm_unpacklo_epi64
	// _mm_unpacklo_epi8
	START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[2] = dst[1]; dst[1] = dst[0]; dst[0] = src2[0]; dst[3] = src2[1]; } ENDSCALAR(checksum_dst(dst), "scalar unpacklo_pd");
	BINARYOP_TEST_D("_mm_unpacklo_pd", _mm_unpacklo_pd, _mm_load_pd(src), _mm_load_pd(src2));
	printf("]}\n");
/*
	printf("Finished!\n");
	printf("Total time spent in scalar intrinsics: %f msecs.\n", (double)scalarTotalTicks * 1000.0 / ticks_per_sec());
	printf("Total time spent in SSE1 intrinsics: %f msecs.\n", (double)simdTotalTicks * 1000.0 / ticks_per_sec());
	if (scalarTotalTicks > simdTotalTicks)
		printf("SSE1 was %.3fx faster than scalar!\n", (double)scalarTotalTicks / simdTotalTicks);
	else
		printf("SSE1 was %.3fx slower than scalar!\n", (double)simdTotalTicks / scalarTotalTicks);
*/
#ifdef __EMSCRIPTEN__
	fprintf(stderr,"User Agent: %s\n", emscripten_run_script_string("navigator.userAgent"));
	printf("/*Test finished! Now please close Firefox to continue with benchmark_sse2.py.*/\n");
#endif
	exit(0);
}