static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */) { for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) { ShortVec tmp; tmp.load_aligned(&hoodNew->sum() + i * C); for (const auto& j: hoodOld.weights(0)) { ShortVec weights; ShortVec values; weights.load_aligned(j.second()); values.gather(&hoodOld->value(), j.first()); tmp += values * weights; } tmp.store_aligned(&hoodNew->sum() + i * C); } }
static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */) { for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) { ShortVec tmp; tmp.load_aligned(&hoodNew->sum() + i * C); for (const auto& j: hoodOld.weights(0)) { ShortVec weights; ShortVec values; weights.load_aligned(j.second()); // fixme: is this gahter actually correct? shouldn't we use offset 0 for the gather? see also hpxperformancetests/main.cpp values.gather(&hoodOld->value(), j.first()); tmp += values * weights; } tmp.store_aligned(&hoodNew->sum() + i * C); } }