Beispiel #1
0
    inline static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
    {
        typedef LibFlatArray::short_vec<double, C> ShortVec;

        for (; hoodNew.index() < indexEnd; hoodNew += C, ++hoodOld) {
            ShortVec x = &hoodOld->x();
            ShortVec y = &hoodOld->y();
            ShortVec cReal = &hoodOld->cReal();
            ShortVec cImag = &hoodOld->cImag();

            for (int i = 0; i < ITERATIONS; ++i) {
                ShortVec cRealOld = cReal;
                cReal = cReal * cReal - cImag * cImag;
                cImag = ShortVec(2.0) * cImag * cRealOld;
            }

            for (const auto& j: hoodOld.weights(0)) {
                ShortVec weights;
                ShortVec otherX;
                ShortVec otherY;
                weights.load_aligned(j.second());
                otherX.gather(&hoodOld->x(), j.first());
                otherY.gather(&hoodOld->y(), j.first());
                cReal += otherX * weights;
                cImag += otherY * weights;
            }

            &hoodNew->x() << x;
            &hoodNew->y() << y;
            &hoodNew->cReal() << cReal;
            &hoodNew->cImag() << cImag;
        }
    }
Beispiel #2
0
 static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
 {
     for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) {
         ShortVec tmp;
         tmp.load_aligned(&hoodNew->sum() + i * C);
         for (const auto& j: hoodOld.weights(0)) {
             ShortVec weights;
             ShortVec values;
             weights.load_aligned(j.second());
             values.gather(&hoodOld->value(), j.first());
             tmp += values * weights;
         }
         tmp.store_aligned(&hoodNew->sum() + i * C);
     }
 }
Beispiel #3
0
 static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
 {
     for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) {
         ShortVec tmp;
         tmp.load_aligned(&hoodNew->sum() + i * C);
         for (const auto& j: hoodOld.weights(0)) {
             ShortVec weights;
             ShortVec values;
             weights.load_aligned(j.second());
             // fixme: is this gahter actually correct? shouldn't we use offset 0 for the gather? see also hpxperformancetests/main.cpp
             values.gather(&hoodOld->value(), j.first());
             tmp += values * weights;
         }
         tmp.store_aligned(&hoodNew->sum() + i * C);
     }
 }