C++ (Cpp) _mm512_storenrngo_pd Exemples

Exemple #1

0

Afficher le fichier

Fichier : output.cpp Projet : 01org/hpc-speedometer

void
printcounters(struct counter *ctrs, uint64_t duration)
{
    struct metrics s = {0};

    s.timestamp = _rdtsc();
    s.duration = duration;
    // We skip the last core
    int corethreads =0;
    for (int cpu = 1; cpu < gbl.ncpus-3; ++cpu)
    {
        double delta[NEVENTS];
        // volatile because another thread is changing it.
        volatile struct counter *p = &ctrs[cpu];

        for (int i = 0; i < NEVENTS; ++i)
        {
            union {
                __m512d c;
                uint64_t values[8];
            } t;
            t.c = _mm512_load_pd((void *)&p->counts[i][0]);
            delta[i] = perf_scale_delta(t.values, lastctr[cpu].counts[i]);
            _mm512_storenrngo_pd((void *)&lastctr[cpu].counts[i][0], t.c);
            if (delta[i] < 0)
                delta[i] = 0;
            sevents[i] += delta[i];
        }

        if (2*delta[clocks1] > duration)
        {
            s.nthreads += 1;
            corethreads += 1;
        }

        if ((cpu % 4) == 0) // Last thread on this core
        {
            if (corethreads)
                s.ncores += 1;
            corethreads = 0;
        }

        s.vpu_ea += delta[vpu_ea];
        s.instrs += delta[instrs];
        s.vinstrs += delta[vpu_ie];
    }
    uint64_t nreads = 0, nwrites = 0;
    for (int i = 0; i < NGBOXES; ++i)
        for (int j = 0; j < 2; ++j)
        {
            nreads += pmu_rdctr(i, j, 0);
            nwrites += pmu_rdctr(i, j, 1);
        }
    s.rbytes = (nreads - prevnreads) * 64;
    s.wbytes = (nwrites - prevnwrites)* 64;
    prevnreads = nreads;
    prevnwrites = nwrites;

    sample(&s);
}

Exemple #2

0

Afficher le fichier

Fichier : omp_transfer.c Projet : uswick/hmpi-omp

inline void transfer_omp_loop_nontemp(uintptr_t rbuf, uintptr_t sbuf, size_t size, HMPI_Request recv_req, HMPI_Request send_req){
    int N_DOUBLES_PER_BLOCK = (64/sizeof(char)) ;
    size_t total = size / 64 ;
    int i = 0;
//#pragma vector nontemporal
#pragma omp parallel for
    for (i = 0; i < total; i++) {
        __m512d v_b = _mm512_load_pd(sbuf+ N_DOUBLES_PER_BLOCK*i);
        _mm512_storenrngo_pd(rbuf+ N_DOUBLES_PER_BLOCK*i, v_b);
    }

}

Exemple #3

0

Afficher le fichier

Fichier : events.cpp Projet : 01org/hpc-speedometer

static void
readcounters(int cpu)
{

    struct counter *cp = &counters[cpu];
    for (int i = 0; i < NEVENTS; ++i)
    {
        struct evinfo *ep = &events[cpu][i];
        union {
            __m512d c;
            uint64_t values[8];
        } u;

        readctr((perf_event_mmap_page *)ep->buf, ep->fd, u.values);
        if (u.values[0])
            _mm512_storenrngo_pd(&cp->counts[i][0], u.c);
    }
}