void KahanWrapper(int commsize, int count, float * tempbuf, float * result) { /* The Kahan algorithm reduces a contiguous vector to a scalar. * Since we do not get a contiguous vector from (all)gather * unless count=1, * we have to do one of the following: * (1) transpose the data, which is either hard in the case of in-place * or uses twice the memory in the case of out-of-place, or * (2) form each contiguous vector as needed, or * (3) modify KahanSum to work with stride>1. * We currently do 2. */ float * kahanbuf = malloc(commsize*sizeof(float)); assert(kahanbuf!=NULL); for (int j=0; j<count; ++j) { for (int i=0; i<commsize; ++i) { kahanbuf[i] = tempbuf[i*commsize+j]; } result[j] = KahanSum(commsize, kahanbuf); } free(kahanbuf); return; }
// =========================================================== double SxR::segmental_SxR( Signal X, Signal y, double (*func)(Signal, Signal, const int), const int length, const int step ) { const int m = length % step; const int n = length / step + 1; double *a = new double[n]; for(int i = 0; i < n - 1; i++){ a[i] = func (X + i * step, y + i * step, step); } double ret; if( m == 0){ ret = KahanSum(a, n-1) / (n-1); }else{ a[n - 1] = func (X + (n - 1) * step, y + (n - 1) * step, m); ret = KahanSum(a, n) / n; } return ret; }