Esempio n. 1
0
void test_pairs(int th_idx)
{
    int rem_th, rem_proc;
    int i, j;
    void *src, *dst;

    rem_th = pairs[TH_ME];
    rem_proc = TH2PROC(rem_th);

    prndbg(th_idx, "test_pair: %d<->%d(%d)\n", TH_ME, rem_th, rem_proc);

    MT_BARRIER();
#if 0
    print_array(th_idx, "before", &AELEM(ptrs2[TH_ME],rem_th,0,0), ASIZExITERS);
#endif
    for (i = 0; i < iters; i++) {
        /* src - addr of my thread block on remote proc/thread */
        src = &AELEM(ptrs1[rem_th],TH_ME,i,0);
        /* src - addr of remote thread block on my proc/thread */
        dst = &AELEM(ptrs2[TH_ME],rem_th,i,0);
        /* get from my pair */
        assert(!ARMCI_Get(src, dst, ASIZE_BYTES, rem_proc));
    }

    MT_BARRIER();
#if 0
    print_array(th_idx, "rcvd", &AELEM(ptrs2[TH_ME],rem_th,0,0), ASIZExITERS);
#endif
    /* check results */
    check_result(&AELEM(ptrs2[TH_ME],rem_th,0,0), rem_th);

}
Esempio n. 2
0
/* test Put/Get/Acc sequence regardless of communication pattern
 *  tgt -- remote target for put/get/acc (none if -1)
 *  rmt -- list of remote thread that put/acc to here (correctness is cheked here)
 *  rmt_cnt -- # of threads in rmt
 */
void test_PutGetAcc(int th_idx, int tgt, int *rmt, int rmt_cnt)
{
    /* a - local thread, b - remote thread */
    int a, b, b_proc, stride[2], count[2];
    int i, j;
    void *src, *dst;
#ifdef DEBUG
    for (i = 0, cbufl = 0; i < rmt_cnt; i++)
        cbufl += sprintf(cbuf+cbufl, " %d", rmt[i]);
    prndbg(th_idx, "test_PutGetAcc: put/acc to %d, get from %d, check put/acc from %s\n",
           tgt, tgt, rmt_cnt ? cbuf : "none");
#endif
    a = TH_ME;
    stride[0] = ASIZE_BYTES;
    count[0] = ASIZE_BYTES; count[1] = 1;

    /* init arrays */
    init_array(th_idx, ptrs1[TH_ME]);
    init_array(th_idx, ptrs2[TH_ME]);
    MT_BARRIER();

    /* put - put a.ptrs1[b] into b.ptrs2[a] */
    if (tgt != -1) {
        b = tgt;
        b_proc = TH2PROC(b);
        for (i = 0; i < iters; i++) {
            src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */
            dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */
//            assert(!ARMCI_Put(src, dst, ASIZE_BYTES, b_proc));
            assert(!ARMCI_PutS(src, stride, dst, stride, count, 1, b_proc));
        }
        ARMCI_Fence(b_proc);
    }
    MT_BARRIER();
    print_array(th_idx, "PUT:ptrs1[TH_ME]", ptrs1[TH_ME]);
    print_array(th_idx, "PUT:ptrs2[TH_ME]", ptrs2[TH_ME]);
    MT_BARRIER();

    /* chk put(s) from b(s): a.ptrs2[b] */
    for (j = 0; j < rmt_cnt; j++) {
        b = rmt[j];
        b_proc = TH2PROC(b);
        check_PutGetAcc(th_idx, b, PUT, &AELEM(ptrs2[a], b, 0, 0));
    }
//return; // REMOVE WHEN DONE

    /* init arrays */
    init_array(th_idx, ptrs1[TH_ME]);
    init_array(th_idx, ptrs2[TH_ME]);
    MT_BARRIER();

    /* get - get b.ptrs1[a] into a.ptrs2[b] */
    if (tgt != -1) {
        b = tgt;
        b_proc = TH2PROC(b);
        for (i = 0; i < iters; i++) {
            src = &AELEM(ptrs1[b], a, i, 0); /* b.ptrs1[a] */
            dst = &AELEM(ptrs2[a], b, i, 0); /* a.ptrs2[b] */
            assert(!ARMCI_GetS(src, stride, dst, stride, count, 1, b_proc));
        }
    }
    print_array(th_idx, "GET:ptrs1[TH_ME]", ptrs1[TH_ME]);
    print_array(th_idx, "GET:ptrs2[TH_ME]", ptrs2[TH_ME]);
    MT_BARRIER();

    /* chk get from b: a.ptrs2[b] */
    if (tgt != -1) {
        check_PutGetAcc(th_idx, b, GET, &AELEM(ptrs2[a], b, 0, 0));
    }

#if 1
    /* init arrays */
    init_array(th_idx, ptrs1[TH_ME]);
    init_array(th_idx, ptrs2[TH_ME]);
    MT_BARRIER();

    /* acc - acc a.ptrs1[b] * scale + b.ptrs2[a] into b.ptrs2[a] */
    if (tgt != -1) {
        b = tgt;
        b_proc = TH2PROC(b);
        for (i = 0; i < iters; i++) {
            src = &AELEM(ptrs1[a], b, i, 0); /* a.ptrs1[b] */
            dst = &AELEM(ptrs2[b], a, i, 0); /* b.ptrs2[a] */
            assert(!ARMCI_AccS(ARMCI_ACC_DBL,&scale,src,stride,dst,stride,count,1,b_proc));
        }
        ARMCI_Fence(b_proc);
    }
    MT_BARRIER();
    print_array(th_idx, "ACC:ptrs1[TH_ME]", ptrs1[TH_ME]);
    print_array(th_idx, "ACC:ptrs2[TH_ME]", ptrs2[TH_ME]);
    MT_BARRIER();

    /* chk acc(s) from b(s): a.ptrs2[b] */
    for (j = 0; j < rmt_cnt; j++) {
        b = rmt[j];
        b_proc = TH2PROC(b);
        check_PutGetAcc(th_idx, b, ACC, &AELEM(ptrs2[a], b, 0, 0));
    }

#endif
    MT_BARRIER();
}
Esempio n. 3
0
void *lu(void *lu_arg)
{
    int n, bs, th_idx;
    int i, il, j, jl, k, kl;
    int I, J, K;
    double *A, *B, *C, *D;
    int dimI, dimJ, dimK;
    int strI, strJ, strK;
    unsigned int t1, t2, t3, t4, t11, t22;
    int diagowner;
    double *buf1, *buf2;

    n = ((int *)lu_arg)[0];
    bs = ((int *)lu_arg)[1];
    th_idx = ((int *)lu_arg)[2];

#ifdef DEBUG
    printf("DBG: starting thread %d(idx=%d) on node %d\n", me_th[th_idx], th_idx, me); fflush(stdout);
#endif

    /* temporary memories */
    buf1 = (double *)malloc(block_size*block_size*sizeof(double));
    buf2 = (double *)malloc(block_size*block_size*sizeof(double));

    for (k=0, K=0; k<n; k+=bs, K++) {
        kl = k + bs; 
        if (kl > n) {
            kl = n;
            strK = kl - k;
        } else {
            strK = bs;
        }

        /* factor diagonal block */
        diagowner = block_owner(K, K);
        if (diagowner == me_th[th_idx]) {
            A = a[K+K*nblocks];
            print_block_dbg(A, "th=%d, idx=%d: before lu0 a[%d]:\n", me_th[th_idx], th_idx, K+K*nblocks);
            lu0(A, strK, strK);
        }
        MT_BARRIER();

        /* divide column k by diagonal block */
        if(block_owner(K, K) == me_th[th_idx])
            D = a[K+K*nblocks];
        else {
            D = buf1;
            get_remote(D, K, K);
        }
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            if (block_owner(I, K) == me_th[th_idx]) {  /* parcel out blocks */
                il = i + bs; 
                if (il > n) {
                    il = n;
                    strI = il - i;
                } else {
                    strI = bs;
                }
                A = a[I+K*nblocks]; 
                bdiv(A, D, strI, strK, strI, strK);
            }
        }

        /* modify row k by diagonal block */
        for (j=kl, J=K+1; j<n; j+=bs, J++) {
            if (block_owner(K, J) == me_th[th_idx]) {  /* parcel out blocks */
                jl = j+bs; 
                if (jl > n) {
                    jl = n;
                    strJ = jl - j;
                } else {
                    strJ = bs;
                }
                A = a[K+J*nblocks];
                bmodd(D, A, strK, strJ, strK, strK);
            }
        }
        MT_BARRIER();

        /* modify subsequent block columns */
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            il = i+bs; 
            if (il > n) {
                il = n;
                strI = il - i;
            } else {
                strI = bs;
            }

            if(block_owner(I,K) == me_th[th_idx])
                A = a[I+K*nblocks];
            else {
                A = buf1;
                get_remote(A, I, K);
            }
            for (j=kl, J=K+1; j<n; j+=bs, J++) {
                jl = j + bs; 
                if (jl > n) {
                    jl = n;
                    strJ= jl - j;
                } else {
                    strJ = bs;
                }
                if (block_owner(I, J) == me_th[th_idx]) {  /* parcel out blocks */
                    if(block_owner(K,J) == me_th[th_idx])
                        B = a[K+J*nblocks];
                    else {
                        B = buf2;
                        get_remote(B, K, J);
                    }
                    C = a[I+J*nblocks];
                    bmod(A, B, C, strI, strJ, strK, strI, strK, strI);
                }
            }
        }
    }

    free(buf1);
    free(buf2);

    return lu_arg;
}