Example #1
0
static void
multsum_f64_sse2_unroll4(double *dest,
     const double *src1, int sstr1,
     const double *src2, int sstr2,
     int n)
{
  __m128d t1, t2;
  union {
    __m128d reg;
    double vals[2];
  } sum;
  int i = 0;

  sum.reg = _mm_setzero_pd();
  while (i < n-3) {
    MULTSUM_SSE2_STRIDED(0);
    MULTSUM_SSE2_STRIDED(2);

    OIL_INCREMENT(src1, 4*sstr1);
    OIL_INCREMENT(src2, 4*sstr2);
    i += 4;
  }
  while (i < n-1) {
    MULTSUM_SSE2_STRIDED(0);

    OIL_INCREMENT(src1, 2*sstr1);
    OIL_INCREMENT(src2, 2*sstr2);
    i+=2;
  }
  *dest = sum.vals[0] + sum.vals[1];
  if (i < n) {
    *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
  }
Example #2
0
static void splat_u32_unroll2 (uint32_t *dest, int dstr, const uint32_t *param, int n)
{
  int i;
  if (n&1) {
    *dest = *param;
    OIL_INCREMENT(dest,dstr);
  }
  n >>= 1;
  for(i=0;i<n;i++){
    *dest = *param;
    OIL_INCREMENT(dest,dstr);
    *dest = *param;
    OIL_INCREMENT(dest,dstr);
  }
}
Example #3
0
static void
sum_f64_i10_unroll4 (double *dest, double *src, int sstr, int n)
{
	double sum1 = 0;
	double sum2 = 0;
	double sum3 = 0;
	double sum4 = 0;
	int i;

	while (n&3) {
		sum1 += *src;
		OIL_INCREMENT (src, sstr);
		n--;
	}
	for(i=0;i<n;i+=4){
		sum1 += OIL_GET(src, sstr*i, double);
		sum2 += OIL_GET(src, sstr*(i+1), double);
		sum3 += OIL_GET(src, sstr*(i+2), double);
		sum4 += OIL_GET(src, sstr*(i+3), double);
	}

	*dest = sum1 + sum2 + sum3 + sum4;
}