Exemplo n.º 1
0
void dotproduct3(vec_ptr u, vec_ptr v, data_t *dest)
{
    long int i;
    *dest = 1.0;
    int len = vec_length(u);
    data_t *data1 = get_vec_start(u);
    data_t *data2 = get_vec_start(v);

    for (i = 0; i < len; i++) {
	    *dest = *dest + data1[i] * data2[i];
    }
}
Exemplo n.º 2
0
/* Accumulate result in local variable */
void dotproduct4(vec_ptr u, vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(u);
    data_t *data1 = get_vec_start(u);
    data_t *data2 = get_vec_start(v);
    data_t acc = 1.0;

    for (i = 0; i < length; i++)
    {
        acc = acc + data1[i] * data2[i];
    }
    *dest = acc;
}
Exemplo n.º 3
0
void dotproduct4(vec_ptr u, vec_ptr v, data_t *dest)
{
    long int i;
    *dest = 1.0;
    int len = vec_length(u);
    data_t *data1 = get_vec_start(u);
    data_t *data2 = get_vec_start(v);
    data_t temp = 0;

    for (i = 0; i < len; i++) {
	    temp = temp + data1[i] * data2[i];
    }
    *dest = temp;
}
Exemplo n.º 4
0
void inner4(vec_ptr u, vec_ptr v, data_t *dest){
	long int i;
	int length = vec_length(u);
	data_t *udata = get_vec_start(u);
	data_t *vdata = get_vec_start(v);
	data_t sum = (data_t) 0;
	//critical path of the function
	for (i=0; i<length;i++){
		sum = sum + udata[i] * vdata[i];

	}
	//optimization for our assigning the value to our pointer
	*dest = sum;
} 
Exemplo n.º 5
0
void unroll12aa_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-11;
    data_t *data = get_vec_start(v);
    data_t acc = IDENT;

    /* Combine 12 elements at a time */
    for (i = 0; i < limit; i+=12) {
	data_t t1 = data[i] OP data[i+1];
	data_t t2 = data[i+2] OP data[i+3];
	data_t u1 = t1 OP t2;
	data_t t3 = data[i+4] OP data[i+5];
	data_t t4 = data[i+6] OP data[i+7];
	data_t u2 = t3 OP t4;
	data_t t5 = data[i+8] OP data[i+9];
	data_t t6 = data[i+10] OP data[i+11];
	data_t u3 = t5 OP t6;
	acc = acc OP (u1 OP u2 OP u3); 
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc = acc OP data[i];
    }

    *dest = acc;
}
Exemplo n.º 6
0
void unroll7aa_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-6;
    data_t *data = get_vec_start(v);
    data_t acc = IDENT;

    /* Combine 7 elements at a time */
    for (i = 0; i < limit; i+=7) {
	data_t t1 = data[i] OP data[i+1];
	data_t t2 = data[i+2] OP data[i+3];
	data_t u1 = t1 OP t2;
	data_t t3 = data[i+4] OP data[i+5];
	data_t t4 = data[i+6];
	data_t u2 = t3 OP t4;
	acc = acc OP (u1 OP u2); 
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc = acc OP data[i];
    }

    *dest = acc;
}
Exemplo n.º 7
0
void unroll8x8_combine(vec_ptr v, data_t *dest)
{
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t *dend = data+length-7;
    data_t acc4 = IDENT;
    data_t acc5 = IDENT;
    data_t acc6 = IDENT;
    data_t acc7 = IDENT;
    data_t acc3 = IDENT;
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;

    while (data < dend) {
	acc0 = acc0 OP data[0];
	acc1 = acc1 OP data[1];
	acc2 = acc2 OP data[2];
	acc3 = acc3  OP data[3];
	acc4 = acc4 OP data[4];
	acc6 = acc5 OP data[5];
	acc6 = acc6 OP data[6];
	acc7 = acc7 OP data[7];
	data += 8;
    }
    dend += 7;
    while (data < dend) {
	acc0 = acc0 OP *data;
	data ++;
    }
    *dest = acc0 OP acc1 OP acc2 OP acc3 OP acc4 OP acc5 OP acc6 OP acc7;
}
Exemplo n.º 8
0
void simd_v2a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    pack_t xfer;
    vec_t accum;
    data_t *data = get_vec_start(v);
    int cnt = vec_length(v);
    data_t result = IDENT;

    /* Initialize accum to IDENT */
    for (i = 0; i < VSIZE; i++)
	xfer.d[i] = IDENT;
    accum = xfer.v;

    while (((long) data) % VBYTES && cnt) {
	result = result OP *data++;
	cnt--;
    }

    while (cnt >= 2*VSIZE) {
	vec_t chunk0 = *((vec_t *) data);
	vec_t chunk1 = *((vec_t *) (data+VSIZE));
	accum = accum OP (chunk0 OP chunk1);
	data += 2*VSIZE;
	cnt -= 2*VSIZE;
    }
    while (cnt) {
	result = result OP *data++;
	cnt--;
    }
    xfer.v = accum;
    for (i = 0; i < VSIZE; i++)
	result = result OP xfer.d[i];
    *dest = result;
}
Exemplo n.º 9
0
void unroll12x6a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-11;
    data_t *data = get_vec_start(v);
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;
    data_t acc3 = IDENT;
    data_t acc4 = IDENT;
    data_t acc5 = IDENT;

    /* Combine 12 elements at a time */
    for (i = 0; i < limit; i+=12) {
	acc0 = acc0 OP data[i];
	acc0 = acc0 OP data[i+6]; 
	acc1 = acc1 OP data[i+1];
	acc1 = acc1 OP data[i+7];
	acc2 = acc2 OP data[i+2];
	acc2 = acc2 OP data[i+8]; 
	acc3 = acc3 OP data[i+3];
	acc3 = acc3 OP data[i+9];
	acc4 = acc4 OP data[i+4]; 
	acc4 = acc4 OP data[i+10]; 
	acc5 = acc5 OP data[i+5];
	acc5 = acc5 OP data[i+11];
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc0 = acc0 OP data[i];
    }
    *dest = (acc0 OP acc1) OP (acc2 OP acc3) OP (acc4 OP acc5);
}
Exemplo n.º 10
0
void unroll10x10a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-9;
    data_t *data = get_vec_start(v);
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;
    data_t acc3 = IDENT;
    data_t acc4 = IDENT;
    data_t acc5 = IDENT;
    data_t acc6 = IDENT;
    data_t acc7 = IDENT;
    data_t acc8 = IDENT;
    data_t acc9 = IDENT;

    /* Combine 10 elements at a time */
    for (i = 0; i < limit; i+=10) {
	acc0 = acc0 OP data[i];   acc1 = acc1 OP data[i+1];
	acc2 = acc2 OP data[i+2]; acc3 = acc3 OP data[i+3];
	acc4 = acc4 OP data[i+4]; acc5 = acc5 OP data[i+5];
	acc6 = acc6 OP data[i+6]; acc7 = acc7 OP data[i+7];
	acc8 = acc8 OP data[i+8]; acc9 = acc9 OP data[i+9];
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc0 = acc0 OP data[i];
    }
    *dest = ((acc0 OP acc1) OP (acc2 OP acc3)) OP
	((acc4 OP acc5) OP (acc6 OP acc7)) OP
	(acc8 OP acc9);
}
Exemplo n.º 11
0
void unroll16a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-15;
    data_t *data = get_vec_start(v);
    data_t acc = IDENT;

    /* Combine 16 elements at a time */
    for (i = 0; i < limit; i+=16) {
	acc = acc OP data[i] OP data[i+1];
	acc = acc OP data[i+2] OP data[i+3];
	acc = acc OP data[i+4] OP data[i+5];
	acc = acc OP data[i+6] OP data[i+7];
	acc = acc OP data[i+8] OP data[i+9];
	acc = acc OP data[i+10] OP data[i+11];
	acc = acc OP data[i+12] OP data[i+13];
	acc = acc OP data[i+14] OP data[i+15];
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc = acc OP data[i];
    }
    *dest = acc;
}
Exemplo n.º 12
0
//Requires that (length-2) is divisible by the block size
void SOR_blocked(vec_ptr v, int *iterations, int b)
{
  long int i, j, ii, jj;
  long int length = get_vec_length(v);
  data_t *data = get_vec_start(v);
  double change, mean_change = 100;
  int iters = 0;
  while (((mean_change/(double)(length*length)) > (double)TOL) || 1) {
    iters++;
    mean_change = 0;
    for (ii = 1; ii < length-1; ii+=b) 
      for (jj = 1; jj < length-1; jj+=b)
	for (i = ii; i < ii+b; i++)
	  for (j = jj; j < jj+b; j++) {
	    change = data[i*length+j] - .25 * (data[(i-1)*length+j] +
					      data[(i+1)*length+j] +
					      data[i*length+j+1] +
					      data[i*length+j-1]);
	    data[i*length+j] -= change * OMEGA;
	    if (change < 0){
	      change = -change;
	    }
	    mean_change += change;
	  }
    if (abs(data[(length-2)*(length-2)]) > 10.0*(MAXVAL - MINVAL)) {
      printf("\n PROBABLY DIVERGENCE iter = %d", iters);
      break;
    }
	if(iters == MAX_ITERS) break;
  }
  *iterations = iters;
}
Exemplo n.º 13
0
void unroll7x7a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-6;
    data_t *data = get_vec_start(v);
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;
    data_t acc3 = IDENT;
    data_t acc4 = IDENT;
    data_t acc5 = IDENT;
    data_t acc6 = IDENT;

    /* Combine 7 elements at a time */
    for (i = 0; i < limit; i+=7) {
	acc0 = acc0 OP data[i];   acc1 = acc1 OP data[i+1];
	acc2 = acc2 OP data[i+2]; acc3 = acc3 OP data[i+3];
	acc4 = acc4 OP data[i+4]; acc5 = acc5 OP data[i+5];
	acc6 = acc6 OP data[i+6];
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc0 = acc0 OP data[i];
    }
    *dest = ((acc0 OP acc1) OP (acc2 OP acc3)) OP (acc4 OP acc5 OP acc6);
}
Exemplo n.º 14
0
void unroll8x4a_combine(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    long int limit = length-7;
    data_t *data = get_vec_start(v);
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;
    data_t acc3 = IDENT;

    /* Combine 8 elements at a time */
    for (i = 0; i < limit; i+=8) {
	acc0 = acc0 OP data[i];   acc1 = acc1 OP data[i+1];
	acc2 = acc2 OP data[i+2]; acc3 = acc3 OP data[i+3];
	acc0 = acc0 OP data[i+4]; acc1 = acc1 OP data[i+5];
	acc2 = acc2 OP data[i+6]; acc3 = acc3 OP data[i+7];
    }

    /* Finish any remaining elements */
    for (; i < length; i++) {
	acc0 = acc0 OP data[i];
    }
    *dest = acc0 OP acc1 OP acc2 OP acc3;
}
Exemplo n.º 15
0
void unroll9x3_combine(vec_ptr v, data_t *dest)
{
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t *dend = data+length-8;
    data_t acc0 = IDENT;
    data_t acc1 = IDENT;
    data_t acc2 = IDENT;

    while (data < dend) {
	acc0 = acc0 OP data[0];
	acc1 = acc1 OP data[1];
	acc2 = acc2 OP data[2];
	acc0 = acc0 OP data[3];
	acc1 = acc1 OP data[4];
	acc2 = acc2 OP data[5];
	acc0 = acc0 OP data[6];
	acc1 = acc1 OP data[7];
	acc2 = acc2 OP data[8];
	data += 9;
    }
    dend += 8;
    while (data < dend) {
	acc0 = acc0 OP *data;
	data ++;
    }
    *dest = acc0 OP acc1 OP acc2; 
}
Exemplo n.º 16
0
void unroll16_combine(vec_ptr v, data_t *dest)
{
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    int over = length%16;
    data_t *dend = data+length-over;
    data_t acc = IDENT;

    while (data < dend) {
	acc = acc OP data[0];
	acc = acc OP data[1];
	acc = acc OP data[2];
	acc = acc OP data[3];
	acc = acc OP data[4];
	acc = acc OP data[5];
	acc = acc OP data[6];
	acc = acc OP data[7];
	acc = acc OP data[8];
	acc = acc OP data[9];
	acc = acc OP data[10];
	acc = acc OP data[11];
	acc = acc OP data[12];
	acc = acc OP data[13];
	acc = acc OP data[14];
	acc = acc OP data[15];
	data += 16;
    }
    dend += over;
    while (data < dend) {
	acc = acc OP *data;
	data ++;
    }
    *dest = acc;
}
Exemplo n.º 17
0
/* SOR */
void SOR(vec_ptr v, int *iterations)
{
  long int i, j;
  long int length = get_vec_length(v);
  data_t *data = get_vec_start(v);
  double change, mean_change = 100;   // start w/ something big
  int iters = 0;

  while ((mean_change/(double)(length*length)) > (double)TOL) {
    iters++;
    mean_change = 0;
    for (i = 1; i < length-1; i++) 
      for (j = 1; j < length-1; j++) {
	change = data[i*length+j] - .25 * (data[(i-1)*length+j] +
					  data[(i+1)*length+j] +
					  data[i*length+j+1] +
					  data[i*length+j-1]);
	data[i*length+j] -= change * OMEGA;
	if (change < 0){
	  change = -change;
	}
	mean_change += change;
      }
    if (abs(data[(length-2)*(length-2)]) > 10.0*(MAXVAL - MINVAL)) {
      printf("\n PROBABLY DIVERGENCE iter = %ld", iters);
      break;
    }
	//Limit the number of iterations, this adds a constant factor to the kernel
	if(iters == MAX_ITERS) break;
  }
   *iterations = iters;
}
Exemplo n.º 18
0
/* $begin combine5px8-ans */
void combine5px8(vec_ptr v, data_t *dest)
{
    int length = vec_length(v); 
    int limit = length - 8; 
    data_t *data = get_vec_start(v); 
    data_t x = IDENT; 
    int i; 

    /* Combine 8 elements at a time */
    for (i = 0; i <= limit; i+=8) {
	x = x OPER data[0]
	    OPER data[1] 
	    OPER data[2] 
	    OPER data[3] 
	    OPER data[4] 
	    OPER data[5] 
	    OPER data[6] 
	    OPER data[7]; 
	data += 8; 
    }

    /* Finish any remaining elements */ 
    for (; i < length; i++) {
	x = x OPER data[0]; 
	data++; 
    }
    *dest = x; 
}
Exemplo n.º 19
0
void inner_ged(vec_ptr u, vec_ptr v, data_t *dest) {
    int i;
    int length = vec_length(u);
    data_t *udata = get_vec_start(u);
    data_t *vdata = get_vec_start(v);
    data_t sum = (data_t) 0;
    
    for(i = 0; i < length; i+=4) {
        sum = sum + udata[i] * vdata[i];
        sum = sum + udata[i+1] * vdata[i+1];
        sum = sum + udata[i+2] * vdata[i+2];
        sum = sum + udata[i+3] * vdata[i+3];
    }
    for(i -= 4; i < length; i++) {
        sum = sum + udata[i] * vdata[i];
    }
    *dest = sum;
}
Exemplo n.º 20
0
void dotproduct7(vec_ptr u, vec_ptr v, data_t *dest)
{
    long int i;
    *dest = 1.0;
    int len = vec_length(u);
    int limit = len -1;
    data_t *data1 = get_vec_start(u);
    data_t *data2 = get_vec_start(v);
    data_t temp = 0;

    for (i = 0; i < limit; i+=2) {
	    temp = temp + (data1[i] * data2[i] + data1[i+1]*data2[i+1]);
    }
    for (; i < len; i++) {
      temp = temp + data1[i] * data2[i];
    }
    *dest = temp;
}
Exemplo n.º 21
0
void simd_v8_combine(vec_ptr v, data_t *dest)
{
    long int i;
    pack_t xfer;
    vec_t accum0, accum1, accum2, accum3, accum4, accum5, accum6, accum7;
    data_t *data = get_vec_start(v);
    int cnt = vec_length(v);
    data_t result = IDENT;

    /* Initialize to accum IDENT */
    for (i = 0; i < VSIZE; i++)
	xfer.d[i] = IDENT;
    accum0 = xfer.v;
    accum1 = xfer.v;
    accum2 = xfer.v;
    accum3 = xfer.v;
    accum4 = xfer.v;
    accum5 = xfer.v;
    accum6 = xfer.v;
    accum7 = xfer.v;
    
    while (((long) data) % VBYTES && cnt) {
	result = result OP *data++;
	cnt--;
    }

    while (cnt >= 8*VSIZE) {
	vec_t chunk0 = *((vec_t *) data);
	vec_t chunk1 = *((vec_t *) (data+VSIZE));
	vec_t chunk2 = *((vec_t *) (data+2*VSIZE));
	vec_t chunk3 = *((vec_t *) (data+3*VSIZE));
	vec_t chunk4 = *((vec_t *) (data+4*VSIZE));
	vec_t chunk5 = *((vec_t *) (data+5*VSIZE));
	vec_t chunk6 = *((vec_t *) (data+6*VSIZE));
	vec_t chunk7 = *((vec_t *) (data+7*VSIZE));
	accum0 = accum0 OP chunk0;
	accum1 = accum1 OP chunk1;
	accum2 = accum2 OP chunk2;
	accum3 = accum3 OP chunk3;
	accum4 = accum4 OP chunk4;
	accum5 = accum5 OP chunk5;
	accum6 = accum6 OP chunk6;
	accum7 = accum7 OP chunk7;
	data += 8*VSIZE;
	cnt -= 8*VSIZE;
    }
    while (cnt) {
	result = result OP *data++;
	cnt--;
    }
    xfer.v = (accum0 OP accum1) OP (accum2 OP accum3);
    xfer.v = xfer.v OP (accum4 OP accum5) OP (accum6 OP accum7);
    for (i = 0; i < VSIZE; i++)
	result = result OP xfer.d[i];
    *dest = result;
}
Exemplo n.º 22
0
void process(vec_t *v, data_t *dest)
{
	int i;
	int length = vec_length(v);
	data_t *d = get_vec_start(v);
	data_t t = IDENT;
	for (i = 0; i < length; i++)
		t = t OP d[i];
	*dest = t;
}
Exemplo n.º 23
0
/* Accumulate in local variable, pointer version */
void combine4p(vec_ptr v, data_t *dest)
{
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t *dend = data+length;
    data_t acc = IDENT;

    for (; data < dend; data++)
	acc = acc OP *data;
    *dest = acc;
}
Exemplo n.º 24
0
/* Direct access to vector data */
void combine3(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);

    *dest = IDENT;
    for (i = 0; i < length; i++) {
	*dest = *dest OP data[i];
    }
}
Exemplo n.º 25
0
/* Accumulate result in local variable */
void combine4(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t acc = IDENT;

    for (i = 0; i < length; i++) {
	acc = acc OP data[i];
    }
    *dest = acc;
}
Exemplo n.º 26
0
Arquivo: 18.c Projeto: diccooo/CSAPP
void inner4b(vec_ptr x, vec_ptr y, data_t *dest)
{
  long int i;
  int length = vec_length(x);
  int limit = length - 2;
  data_t *xdata = get_vec_start(x);
  data_t *ydata = get_vec_start(y);
  data_t sum = (data_t) 0;
  data_t s1, s2, s3;

  for (i = 0; i < limit; i += 3){
    s1 = xdata[i] * ydata[i];
    s2 = xdata[i + 1] * ydata[i + 1];
    s3 = xdata[i + 2] * ydata[i + 2];
    sum += (s1 + s2 + s3);
  }
  for (; i < length; i++){
    sum += xdata[i] * ydata[i];
  }
  *dest = sum;
}
Exemplo n.º 27
0
/* Unroll loop by 2. Change associativity of combining operation */
void dotproduct7(vec_ptr u, vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(u);
    long int limit = length - 1;
    data_t *data1 = get_vec_start(u);
    data_t *data2 = get_vec_start(v);
    data_t acc = 1.0;

    /*combine 2 elements at a time */
    for (i = 0; i < limit; i+=2)
    {
        acc = acc + (data1[i] * data2[i] + data1[i+1] * data2[i+1]);
    }

    /* finish any remaining elements */
    for(; i < length; i++)
    {
        acc = acc + (data1[i] * data2[i]);
    }
    *dest = acc;
}
Exemplo n.º 28
0
void simd_v4_combine(vec_ptr v, data_t *dest)
{
    long int i;
    pack_t xfer;
    data_t *data = get_vec_start(v);
    int cnt = vec_length(v);
    data_t result = IDENT;

    /* Create 4 accumulators and initialize elements to IDENT */
    vec_t accum0, accum1, accum2, accum3;
    for (i = 0; i < VSIZE; i++)
	xfer.d[i] = IDENT;
    accum0 = xfer.v; accum1 = xfer.v;
    accum2 = xfer.v; accum3 = xfer.v;
    
    while (((long) data) % VBYTES && cnt) {
	result = result OP *data++;
	cnt--;
    }

    /* $begin simd_v4_loop-c */
    /* Accumulate with 4x VSIZE parallelism */
    while (cnt >= 4*VSIZE) {
	vec_t chunk0 = *((vec_t *) data);
	vec_t chunk1 = *((vec_t *) (data+VSIZE));
	vec_t chunk2 = *((vec_t *) (data+2*VSIZE));
	vec_t chunk3 = *((vec_t *) (data+3*VSIZE));
	accum0 = accum0 OP chunk0;
	accum1 = accum1 OP chunk1;
	accum2 = accum2 OP chunk2;
	accum3 = accum3 OP chunk3;
	data += 4*VSIZE;
	cnt -= 4*VSIZE;
    }
    /* $end simd_v4_loop-c */

    while (cnt) {
	result = result OP *data++;
	cnt--;
    }

    /* $begin simd_v4_accum-c */
    /* Combine into single accumulator */
    xfer.v = (accum0 OP accum1) OP (accum2 OP accum3);

    /* Combine results from accumulators within vector */
    for (i = 0; i < VSIZE; i++)
	result = result OP xfer.d[i];
    /* $end simd_v4_accum-c */
    *dest = result;
}
Exemplo n.º 29
0
/* Make sure dest updated on each iteration */
void combine3w(vec_ptr v, data_t *dest)
{
    long int i;
    long int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t acc = IDENT;

    /* Initialize in event length <= 0 */
    *dest = acc; 

    for (i = 0; i < length; i++) {
	acc = acc OP data[i];
	*dest = acc;
    }
}
Exemplo n.º 30
0
/* Unroll loop by 2, pointer version */
void combine5p(vec_ptr v, data_t *dest)
{
    data_t *data = get_vec_start(v);
    data_t *dend = data+vec_length(v);
    data_t *dlimit = dend-1;
    data_t acc = IDENT;

    /* Combine 3 elements at a time */
    for (; data < dlimit; data += 2) {
	acc = acc OP data[0] OP data[1];
    }

    /* Finish any remaining elements */
    for (; data < dend; data++) {
	acc = acc OP data[0];
    }
    *dest = acc;
}