boost::optional<double> SimpleClean::FindPeakAVX(const double *image, size_t width, size_t height, size_t& x, size_t& y, size_t startY, size_t endY, size_t horizontalBorder, size_t verticalBorder) { double peakMax = std::numeric_limits<double>::min(); size_t peakIndex = 0; __m256d mPeakMax = _mm256_set1_pd(peakMax); size_t xiStart = horizontalBorder, xiEnd = width - horizontalBorder; size_t yiStart = std::max(startY, verticalBorder), yiEnd = std::min(endY, height - verticalBorder); if(xiEnd < xiStart) xiEnd = xiStart; if(yiEnd < yiStart) yiEnd = yiStart; for(size_t yi=yiStart; yi!=yiEnd; ++yi) { size_t index = yi*width + xiStart; const double* const endPtr = image + yi*width + xiEnd - 4; const double *i=image + index; for(; i<endPtr; i+=4) { __m256d val = _mm256_loadu_pd(i); if(AllowNegativeComponent) { __m256d negVal = _mm256_sub_pd(_mm256_set1_pd(0.0), val); val = _mm256_max_pd(val, negVal); } int mask = _mm256_movemask_pd(_mm256_cmp_pd(val, mPeakMax, _CMP_GT_OQ)); if(mask != 0) { for(size_t di=0; di!=4; ++di) { double value = i[di]; if(AllowNegativeComponent) value = std::fabs(value); if(value > peakMax) { peakIndex = index+di; peakMax = std::fabs(i[di]); mPeakMax = _mm256_set1_pd(peakMax); } } } index+=4; } for(; i!=endPtr+4; ++i) { double value = *i; if(AllowNegativeComponent) value = std::fabs(value); if(value > peakMax) { peakIndex = index; peakMax = std::fabs(*i); } ++index; } } x = peakIndex % width; y = peakIndex / width; return image[x + y*width]; }
void static avx_test (void) { int i; int d; union256d s1; double source[4] = {-45, -3, -34.56, 35}; int e = 0; s1.x = _mm256_loadu_pd (source); d = _mm256_movemask_pd (s1.x); for (i = 0; i < 4; i++) if (source[i] < 0) e |= (1 << i); if (checkVi (&d, &e, 1)) abort (); }