inline static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */) { typedef LibFlatArray::short_vec<double, C> ShortVec; for (; hoodNew.index() < indexEnd; hoodNew += C, ++hoodOld) { ShortVec x = &hoodOld->x(); ShortVec y = &hoodOld->y(); ShortVec cReal = &hoodOld->cReal(); ShortVec cImag = &hoodOld->cImag(); for (int i = 0; i < ITERATIONS; ++i) { ShortVec cRealOld = cReal; cReal = cReal * cReal - cImag * cImag; cImag = ShortVec(2.0) * cImag * cRealOld; } for (const auto& j: hoodOld.weights(0)) { ShortVec weights; ShortVec otherX; ShortVec otherY; weights.load_aligned(j.second()); otherX.gather(&hoodOld->x(), j.first()); otherY.gather(&hoodOld->y(), j.first()); cReal += otherX * weights; cImag += otherY * weights; } &hoodNew->x() << x; &hoodNew->y() << y; &hoodNew->cReal() << cReal; &hoodNew->cImag() << cImag; } }
static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */) { for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) { ShortVec tmp; tmp.load_aligned(&hoodNew->sum() + i * C); for (const auto& j: hoodOld.weights(0)) { ShortVec weights; ShortVec values; weights.load_aligned(j.second()); values.gather(&hoodOld->value(), j.first()); tmp += values * weights; } tmp.store_aligned(&hoodNew->sum() + i * C); } }
static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */) { for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) { ShortVec tmp; tmp.load_aligned(&hoodNew->sum() + i * C); for (const auto& j: hoodOld.weights(0)) { ShortVec weights; ShortVec values; weights.load_aligned(j.second()); // fixme: is this gahter actually correct? shouldn't we use offset 0 for the gather? see also hpxperformancetests/main.cpp values.gather(&hoodOld->value(), j.first()); tmp += values * weights; } tmp.store_aligned(&hoodNew->sum() + i * C); } }