コード例 #1
0
ファイル: test_mt.c プロジェクト: arnolda/scafacos
int main(int argc, char *argv[])
{
    int ch;
    extern char *optarg;
    int i, j, r;
    thread_t threads[MAX_TPP];

    /* init MP */
    MP_INIT(argc,argv);
    MP_PROCS(&size);
    MP_MYID(&rank);

    while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) {
        switch(ch) {
            case 't': /* # of threads */
                tpp = atoi(optarg);
                if (tpp < 1 || tpp > MAX_TPP) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number between 1 and %d(MAX_TPP)\n",
                            optarg, MAX_TPP);
                    usage();
                }
                break;
            case 'i': /* # of iterations */
                iters = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 's': /* # of elements in the array */
                asize = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -s, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 'd': delay = atoi(optarg); break; /* delay before start */
            case 'h': usage(); break; /* print usage info */
        }
    }
#ifdef NOTHREADS
    tpp = 1;
    PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n");
#endif
    th_size = size * tpp;
    PRINTF0("\nTest of multi-threaded capabilities:\n"
            "%d threads per process (%d threads total),\n"
            "%d array elements of size %d,\n"
            "%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters);
    if (delay) {
        printf("%d: %d\n", rank, getpid());
        fflush(stdout);
        sleep(delay);
        MP_BARRIER();
    }
    TH_INIT(size,tpp);
    for (i = 0; i < tpp; i++) th_rank[i] = rank * tpp + i;

#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) {
        fname[10] = '0' + th_rank[i] / 100;
        fname[11] = '0' + th_rank[i] % 100 / 10;
        fname[12] = '0' + th_rank[i] % 10;
        dbg[i] = fopen(fname, "w");
    }
#endif
    for (i = 0; i < tpp; i++)
        prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]);

    /* init ARMCI */
    ARMCI_Init();

    /* set global seed (to ensure same random sequence across procs) */
    time_seed = (unsigned)time(NULL);
    armci_msg_brdcst(&time_seed, sizeof(time_seed), 0);
    srand(time_seed); rand();
    prndbg(0, "seed = %u\n", time_seed);
    /* random pairs */
    pairs = calloc(th_size, sizeof(int));
    for (i = 0; i < th_size; i++) pairs[i] = -1;
    for (i = 0; i < th_size; i++) {
        if (pairs[i] != -1) continue;
        r = RND(0, th_size);
        while (i == r || pairs[r] != -1 ) r = RND(0, th_size);
        pairs[i] = r; pairs[r] = i;
    }
    for (i = 0, cbufl = 0; i < th_size; i++)
        cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d",
                         i, pairs[i], pairs[i], pairs[pairs[i]]);
    prndbg(0, "random pairs:%s\n", cbuf);
    /* random targets */
    rnd_tgts = calloc(th_size, sizeof(int));
    for (i = 0, cbufl = 0; i < th_size; i++) {
        rnd_tgts[i] = RND(0, th_size);
        if (rnd_tgts[i] == i) { i--; continue; }
        cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]);
    }
    prndbg(0, "random targets:%s\n", cbuf);
    /* random one */
    rnd_one = RND(0, th_size);
    prndbg(0, "random one = %d\n", rnd_one);

    assert(ptrs1 = calloc(th_size, sizeof(void *)));
    assert(ptrs2 = calloc(th_size, sizeof(void *)));
#ifdef NOTHREADS
    thread_main((void *)(long)0);
#else
    for (i = 0; i < tpp; i++) THREAD_CREATE(threads + i, thread_main, (void *)(long)i);
    for (i = 0; i < tpp; i++) THREAD_JOIN(threads[i], NULL);
#endif

    MP_BARRIER();
    PRINTF0("Tests Completed\n");

    /* clean up */
#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) fclose(dbg[i]);
#endif
    ARMCI_Finalize();
    TH_FINALIZE();
    MP_FINALIZE();

	return 0;
}
コード例 #2
0
ファイル: lu-block-th.c プロジェクト: bcernohous/ga
main(int argc, char *argv[])
{
    int i, j, l;
    int ch;
    extern char *optarg;
    int edge;
    int size;
    int lu_arg[MAX_THREADS][3];
    
    /* ARMCI */
    void **ptr;
    double **ptr_loc;

    THREAD_LOCK_INIT(mutex);
    
    armci_msg_init(&argc,&argv);
    nproc = armci_msg_nproc();
    me = armci_msg_me();
    
    while ((ch = getopt(argc, argv, "n:b:p:t:d:h")) != -1) {
        switch(ch) {
            case 'n': n = atoi(optarg); break;
            case 'b': block_size = atoi(optarg); break;
            case 'p': nproc = atoi(optarg); break;
            case 't': th_per_p = atoi(optarg); break;
            case 'd': d = atoi(optarg); break;
            case 'h': {
                printf("Usage: LU, or \n");
        printf("       LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC -tTH_PER_P\n");
                armci_msg_barrier();
                armci_msg_finalize();
                exit(0);
            } 
        }
    }

    if(th_per_p>MAX_THREADS) {
        th_per_p=MAX_THREADS;
        if(me==0)printf("Warning: cannot run more than %d threads, adjust MAX_THREADS",MAX_THREADS);
    }

    if (d) {
        fprintf(stderr, "%d: %d\n", me, getpid());
        sleep(d);
    }

    nthreads = th_per_p * nproc;
    if(me == 0) {
        printf("\n Blocked Dense LU Factorization\n");
        printf("     %d by %d Matrix\n", n, n);
        printf("     %d Processors\n", nproc);
        printf("     %d thread(s) per processor, %d threads total\n", th_per_p, nthreads);
        printf("     %d by %d Element Blocks\n", block_size, block_size);
        printf("\n");
    }
    
    num_rows = (int) sqrt((double) nthreads);
    for (;;) {
        num_cols = nthreads/num_rows;
        if (num_rows*num_cols == nthreads)
            break;
        num_rows--;
    }
    
    nblocks = n/block_size;
    if (block_size * nblocks != n) {
        nblocks++;
    }
    
    num = (nblocks * nblocks)/nthreads;
    if((num * nthreads) != (nblocks * nblocks))
        num++;

    edge = n%block_size;
    if (edge == 0) {
        edge = block_size;
    }
#ifdef DEBUG
    if(me == 0)
        for (i=0;i<nblocks;i++) {
            for (j=0;j<nblocks;j++) 
                printf("%d ", block_owner(i, j));
            printf("\n");
        }
    armci_msg_barrier();
/*    armci_msg_finalize(); */
/*    exit(0); */
#endif
    
    for (l = 0; l < th_per_p; l++) {
        me_th[l] = me * th_per_p + l;
        for (i=0;i<nblocks;i++) {
            for (j=0;j<nblocks;j++) {
                if(block_owner(i,j) == me_th[l]) {
                    if ((i == nblocks-1) && (j == nblocks-1)) {
                        size = edge*edge;
                    }
                    else if ((i == nblocks-1) || (j == nblocks-1)) {
                        size = edge*block_size;
                    }
                    else {
                        size = block_size*block_size;
                    }
                    thread_doubles[l] += size;
                }
            }
        }
        proc_bytes += thread_doubles[l] * sizeof(double);
    }

    /* initialize ARMCI */
    ARMCI_Init();
    ptr = (void **)malloc(nproc * sizeof(void *));
    ARMCI_Malloc(ptr, proc_bytes);

    a = (double **)malloc(nblocks*nblocks*sizeof(double *));
    if (a == NULL) {
        fprintf(stderr, "Could not malloc memory for a\n");
        exit(-1);
    }
    ptr_loc = (double **)malloc(nthreads*sizeof(double *));
    for (i = 0; i < nproc; i++) {
        ptr_loc[i * th_per_p] = (double *)ptr[i];
        for (j = 1; j < th_per_p; j++)
            ptr_loc[i * th_per_p + j] = ptr_loc[i * th_per_p + j - 1] + thread_doubles[j - 1];
    }
    for(i=0; i<nblocks;i ++) {
        for(j=0; j<nblocks; j++) {
            a[i+j*nblocks] = ptr_loc[block_owner(i, j)];
            if ((i == nblocks-1) && (j == nblocks-1)) {
                size = edge*edge;
            } else if ((i == nblocks-1) || (j == nblocks-1)) {
                size = edge*block_size;
            } else {
                size = block_size*block_size;
            }
            ptr_loc[block_owner(i, j)] += size;
        }
    }
#if 0
    for(i=0; i<nblocks*nblocks;i ++) printf("%d: a[%d]=%p\n", me, i, a[i]);
    fflush(stdout);
#endif
    
    /* initialize the array */
    init_array();
    
    /* barrier to ensure all initialization is done */
    armci_msg_barrier();

    /* to remove cold-start misses, all processors touch their own data */
/*    for (l = 0; l < th_per_p; l++) touch_array(block_size, me_th[l]); */
    armci_msg_barrier();

    if(doprint) {
        if(me == 0) {
            printf("Matrix before LU decomposition\n");
            print_array(me); 
        }
        armci_msg_barrier();
    }

#if 1
    for (i = 0; i < nblocks; i++)
        for (j = 0; j < nblocks; j++)
            print_block_dbg(a[i + j * nblocks], "proc %d, a[%d, %d]:\n", me, i, j);
#endif

    TH_INIT(nproc,th_per_p);

    /* Starting the timer */
    if(me == 0) start_timer();

    for (l = 0; l < th_per_p; l++) {
        lu_arg[l][0] = n;
        lu_arg[l][1] = block_size;
        lu_arg[l][2] = l;
        THREAD_CREATE(threads + l, lu, lu_arg[l]);
    }
    
    for (l = 0; l < th_per_p; l++) THREAD_JOIN(threads[l], NULL);
    armci_msg_barrier();

    /* Timer Stops here */
    if(me == 0) 
        printf("\nRunning time = %lf milliseconds.\n\n",  elapsed_time());

    if(doprint) {        
        if(me == 0) {
            printf("after LU\n");
            print_array(me);
        }
        armci_msg_barrier();
    }
    
    /* done */
    ARMCI_Free(ptr[me]);
    ARMCI_Finalize();
    armci_msg_finalize();

    THREAD_LOCK_DESTROY(mutex);
}