Пример #1
0
/*Run Get_accumulate with flush */
void run_get_acc_with_flush(int rank, WINDOW type)
{
    int size, i;
    MPI_Aint disp = 0;
    MPI_Win     win;

    for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : size + 1)) {
        allocate_memory(rank, rbuf, size, type, &win);

        if (type == WIN_DYNAMIC) {
            disp = sdisp_remote;
        }

        if(size > LARGE_MESSAGE_SIZE) {
            loop = LOOP_LARGE;
            skip = SKIP_LARGE;
        }

        if(rank == 0) {
            MPI_CHECK(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, win));
            for (i = 0; i < skip + loop; i++) {
                if (i == skip) {
                    t_start = MPI_Wtime ();
                }
                MPI_CHECK(MPI_Get_accumulate(sbuf, size, MPI_CHAR, cbuf, size, MPI_CHAR, 1, disp, size,
                    MPI_CHAR, MPI_SUM, win));
                MPI_CHECK(MPI_Win_flush(1, win));
            }
            t_end = MPI_Wtime ();
            MPI_CHECK(MPI_Win_unlock(1, win));
        }                

        MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
        
        print_latency(rank, size);
        
        MPI_Win_free(&win);
    }
}
Пример #2
0
/*Run ACC with Lock/unlock */
void run_acc_with_lock(int rank, WINDOW type)
{
    int size, i;
    MPI_Aint disp = 0;
    MPI_Win     win;

    for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : 1)) {
        allocate_memory(rank, sbuf_original, rbuf_original, &sbuf, &rbuf, &sbuf, size, type, &win);

#if MPI_VERSION >= 3
        if (type == WIN_DYNAMIC) {
            disp = disp_remote;
        }
#endif
        if(size > LARGE_MESSAGE_SIZE) {
            loop = LOOP_LARGE;
            skip = SKIP_LARGE;
        }

        if(rank == 0) {
            for (i = 0; i < skip + loop; i++) {
                if (i == skip) {
                    t_start = MPI_Wtime ();
                }
                MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win));
                MPI_CHECK(MPI_Accumulate(sbuf, size, MPI_CHAR, 1, disp, size, MPI_CHAR, MPI_SUM, win));
                MPI_CHECK(MPI_Win_unlock(1, win));
            }
            t_end = MPI_Wtime ();
        }                

        MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));

        print_latency(rank, size);

        free_memory (sbuf, rbuf, win, rank);
    }
}
Пример #3
0
void MPIMutex::unlock(int proc) {

  int rank, nproc;
  MPI_Comm_rank(comm, &rank);
  MPI_Comm_size(comm, &nproc);

	byte *buff = (byte*)malloc(nproc*sizeof(byte));
	buff[rank] = 0;

  /* Get all data from the lock_buf, except the byte belonging to
   * me. Set the byte belonging to me to 0. */
  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win);
  MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win);
  /* Get data to the left of rank */
  if (rank > 0) {
      MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win);
  }
  /* Get data to the right of rank */
  if (rank < nproc - 1) {
      MPI_Get(&buff[rank+1], nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank,
              MPI_BYTE, win);
  }
  MPI_Win_unlock(proc, win);

  /* Notify the next waiting process, starting to my right for fairness */
  for (int i = 1; i < nproc; i++) {
      int p = (rank + i) % nproc;
      if (buff[p] == 1) {
          //std::cout << "notifying "<<p<<"[proc = "<<proc<<"]" << std::endl;
          MPI_Send(NULL, 0, MPI_BYTE, p, MPI_MUTEX_TAG+id, comm);
          break;
      }
  }

 	//std::cout << "lock released [proc = "<<proc<<"]" << std::endl;
  free(buff);
};
Пример #4
0
int main(int argc, char *argv[])
{
    int rank = 0, nprocs = 0, dst = 0;
    int winbuf[BUFSIZE];
    MPI_Win win = MPI_WIN_NULL;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    memset(winbuf, 0, sizeof(int) * BUFSIZE);
    MPI_Win_create(winbuf, sizeof(int) * BUFSIZE, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    if (rank == 0) {
        /* lock each process */
        for (dst = 0; dst < nprocs; dst++) {
            MPI_Win_lock(MPI_LOCK_SHARED, dst, 0, win);
        }

        /* unlock each process */
        for (dst = nprocs - 1; dst >= 0; dst--) {
            MPI_Win_unlock(dst, win);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Win_free(&win);

    if (rank == 0) {
        fprintf(stdout, " No Errors\n");
        fflush(stdout);
    }

    MPI_Finalize();
    return 0;
}
Пример #5
0
/*Run FOP with flush local*/
void run_fop_with_flush_local (int rank, WINDOW type)
{
    int i;
    MPI_Win     win;

    MPI_Aint disp = 0;

    MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));

    allocate_atomic_memory(rank, sbuf_original, rbuf_original,
                tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf,
                (char **)&tbuf, NULL, (char **)&rbuf,  MAX_MSG_SIZE, type, &win);

    if(rank == 0) {
        if (type == WIN_DYNAMIC) {
            disp = disp_remote;
        }

        MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win));
        for (i = 0; i < skip + loop; i++) {
            if (i == skip) {
                t_start = MPI_Wtime ();
            }
            MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win));
            MPI_CHECK(MPI_Win_flush_local(1, win));
        }
        t_end = MPI_Wtime ();
        MPI_CHECK(MPI_Win_unlock(1, win));
    }                

    MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));

    print_latency(rank, 8);

    free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank);
}
Пример #6
0
int main( int argc, char *argv[] )
{
    int rank, nproc, i;
    int errors = 0, all_errors = 0;
    int *buf;
    MPI_Win window;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc < 2) {
        if (rank == 0) printf("Error: must be run with two or more processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /** Create using MPI_Win_create() **/

    if (rank == 0) {
      MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf);
      *buf = nproc-1;
    } else
      buf = NULL;

    MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window);

    /* PROC_NULL Communication */
    {
        MPI_Request pn_req[4];
        int val[4], res;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window);

    /* GET-ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request gacc_req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req);
        assert(gacc_req != MPI_REQUEST_NULL);
        MPI_Wait(&gacc_req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+PUT: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }
    MPI_Win_unlock(0, window);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait inside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);

        MPI_Win_unlock_all(window);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait outside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    /* Wait in a different epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_lock_all(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_unlock_all(window);
    }

    /* Wait in a fence epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_fence(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_fence(0, window);
    }

    MPI_Win_free(&window);
    if (buf) MPI_Free_mem(buf);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();

    return 0;
}
Пример #7
0
int main(int argc, char **argv)
{
    int i, j, rank, nranks, peer, bufsize, errs;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    for (i = 0; i < XDIM * YDIM; i++) {
        *(win_buf + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank + 1) % nranks;

    /* Perform ITERATIONS strided put operations */

    for (i = 0; i < ITERATIONS; i++) {
        MPI_Aint idx_loc[SUB_YDIM];
        int idx_rem[SUB_YDIM];
        int blk_len[SUB_YDIM];
        MPI_Datatype src_type, dst_type;

        for (j = 0; j < SUB_YDIM; j++) {
            MPI_Get_address(&src_buf[j * XDIM], &idx_loc[j]);
            idx_rem[j] = j * XDIM * sizeof(double);
            blk_len[j] = SUB_XDIM * sizeof(double);
        }

        MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type);
        MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM * sizeof(double), idx_rem, MPI_BYTE,
                                      &dst_type);

        MPI_Type_commit(&src_type);
        MPI_Type_commit(&dst_type);

        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win);
        MPI_Win_unlock(peer, buf_win);

        MPI_Type_free(&src_type);
        MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errs = 0;
    for (i = 0; i < SUB_XDIM; i++) {
        for (j = 0; j < SUB_YDIM; j++) {
            const double actual = *(win_buf + i + j * XDIM);
            const double expected = (1.0 + ((rank + nranks - 1) % nranks));
            if (actual - expected > 1e-10) {
                SQUELCH(printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
                               rank, j, i, expected, actual););
                errs++;
                fflush(stdout);
            }
        }
Пример #8
0
int main(int argc, char *argv[])
{
    int err, errs = 0;
    int rank, size, orig, target;
    int minsize = 2, count;
    int i, j;
    MPI_Aint origcount, targetcount;
    MPI_Comm comm;
    MPI_Win win;
    MPI_Aint lb, extent;
    MPI_Datatype origtype, targettype;
    DTP_t orig_dtp, target_dtp;
    void *origbuf, *targetbuf;

    MTest_Init(&argc, &argv);

#ifndef USE_DTP_POOL_TYPE__STRUCT       /* set in 'test/mpi/structtypetest.txt' to split tests */
    MPI_Datatype basic_type;
    int len;
    char type_name[MPI_MAX_OBJECT_NAME] = { 0 };

    err = MTestInitBasicSignature(argc, argv, &count, &basic_type);
    if (err)
        return MTestReturnValue(1);

    err = DTP_pool_create(basic_type, count, &orig_dtp);
    if (err != DTP_SUCCESS) {
        MPI_Type_get_name(basic_type, type_name, &len);
        fprintf(stdout, "Error while creating orig pool (%s,%d)\n", type_name, count);
        fflush(stdout);
    }

    err = DTP_pool_create(basic_type, count, &target_dtp);
    if (err != DTP_SUCCESS) {
        MPI_Type_get_name(basic_type, type_name, &len);
        fprintf(stdout, "Error while creating target pool (%s,%d)\n", type_name, count);
        fflush(stdout);
    }
#else
    MPI_Datatype *basic_types = NULL;
    int *basic_type_counts = NULL;
    int basic_type_num;

    err = MTestInitStructSignature(argc, argv, &basic_type_num, &basic_type_counts, &basic_types);
    if (err)
        return MTestReturnValue(1);

    err = DTP_pool_create_struct(basic_type_num, basic_types, basic_type_counts, &orig_dtp);
    if (err != DTP_SUCCESS) {
        fprintf(stdout, "Error while creating struct pool\n");
        fflush(stdout);
    }

    err = DTP_pool_create_struct(basic_type_num, basic_types, basic_type_counts, &target_dtp);
    if (err != DTP_SUCCESS) {
        fprintf(stdout, "Error while creating struct pool\n");
        fflush(stdout);
    }

    /* this is ignored */
    count = 0;
#endif

    while (MTestGetIntracommGeneral(&comm, minsize, 1)) {
        if (comm == MPI_COMM_NULL)
            continue;

        MPI_Comm_rank(comm, &rank);
        MPI_Comm_size(comm, &size);
        orig = 0;
        target = size - 1;

        for (i = 0; i < target_dtp->DTP_num_objs; i++) {
            err = DTP_obj_create(target_dtp, i, 0, 0, 0);
            if (err != DTP_SUCCESS) {
                errs++;
                break;
            }

            targetcount = target_dtp->DTP_obj_array[i].DTP_obj_count;
            targettype = target_dtp->DTP_obj_array[i].DTP_obj_type;
            targetbuf = target_dtp->DTP_obj_array[i].DTP_obj_buf;

            MPI_Type_get_extent(targettype, &lb, &extent);

            MPI_Win_create(targetbuf, lb + targetcount * extent,
                           (int) extent, MPI_INFO_NULL, comm, &win);

            for (j = 0; j < orig_dtp->DTP_num_objs; j++) {
                err = DTP_obj_create(orig_dtp, j, 0, 1, count);
                if (err != DTP_SUCCESS) {
                    errs++;
                    break;
                }

                origcount = orig_dtp->DTP_obj_array[j].DTP_obj_count;
                origtype = orig_dtp->DTP_obj_array[j].DTP_obj_type;
                origbuf = orig_dtp->DTP_obj_array[j].DTP_obj_buf;

                if (rank == orig) {
                    MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win);
                    MPI_Accumulate(origbuf, origcount,
                                   origtype, target, 0, targetcount, targettype, MPI_REPLACE, win);
                    MPI_Win_unlock(target, win);
                    MPI_Barrier(comm);

                    char *resbuf = (char *) calloc(lb + extent * targetcount, sizeof(char));

                    /*wait for the destination to finish checking and reinitializing the buffer */
                    MPI_Barrier(comm);

                    MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win);
                    MPI_Get_accumulate(origbuf, origcount,
                                       origtype, resbuf, targetcount, targettype,
                                       target, 0, targetcount, targettype, MPI_REPLACE, win);
                    MPI_Win_unlock(target, win);
                    MPI_Barrier(comm);
                    free(resbuf);
                } else if (rank == target) {
                    /* TODO: add a DTP_buf_set() function to replace this */
                    char *tmp = (char *) calloc(lb + extent * targetcount, sizeof(char));
                    memcpy(tmp, targetbuf, lb + extent * targetcount);

                    MPI_Barrier(comm);
                    MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win);
                    err = DTP_obj_buf_check(target_dtp, i, 0, 1, count);
                    if (err != DTP_SUCCESS) {
                        errs++;
                    }
                    /* restore target buffer */
                    memcpy(targetbuf, tmp, lb + extent * targetcount);
                    free(tmp);

                    MPI_Win_unlock(target, win);

                    /*signal the source that checking and reinitialization is done */
                    MPI_Barrier(comm);

                    MPI_Barrier(comm);
                    MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win);
                    err = DTP_obj_buf_check(target_dtp, i, 0, 1, count);
                    if (err != DTP_SUCCESS) {
                        errs++;
                    }
                    MPI_Win_unlock(target, win);
                }
                DTP_obj_free(orig_dtp, j);
            }
            MPI_Win_free(&win);
            DTP_obj_free(target_dtp, i);
        }
        MTestFreeComm(&comm);
    }
    DTP_pool_free(orig_dtp);
    DTP_pool_free(target_dtp);

#ifdef USE_DTP_POOL_TYPE__STRUCT
    /* cleanup array if any */
    if (basic_types) {
        free(basic_types);
    }
    if (basic_type_counts) {
        free(basic_type_counts);
    }
#endif

    MTest_Finalize(errs);
    return MTestReturnValue(errs);
}
int main(int argc, char **argv) {
    int rank, nranks, rank_world, nranks_world;
    int i, j, peer, bufsize, errors;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;
    MPI_Comm shr_comm;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks_world);

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shr_comm);

    MPI_Comm_rank(shr_comm, &rank);
    MPI_Comm_size(shr_comm, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    MPI_Win_allocate_shared(bufsize, 1, MPI_INFO_NULL, shr_comm, &win_buf, &buf_win);

    MPI_Win_fence(0, buf_win);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf + i) = -1.0;
        *(src_buf + i) =  1.0 + rank;
    }

    MPI_Win_fence(0, buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided accumulate operations */

    for (i = 0; i < ITERATIONS; i++) {
        int idx_rem[SUB_YDIM];
        int blk_len[SUB_YDIM];
        MPI_Datatype src_type, dst_type;

        for (j = 0; j < SUB_YDIM; j++) {
            idx_rem[j] = j*XDIM;
            blk_len[j] = SUB_XDIM;
        }

        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &src_type);
        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &dst_type);

        MPI_Type_commit(&src_type);
        MPI_Type_commit(&dst_type);

        /* PUT */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_REPLACE, buf_win);
        MPI_Win_unlock(peer, buf_win);

        /* GET */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_NO_OP, buf_win);
        MPI_Win_unlock(peer, buf_win);

        MPI_Type_free(&src_type);
        MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
        for (j = 0; j < SUB_YDIM; j++) {
            const double actual   = *(win_buf + i + j*XDIM);
            const double expected = (1.0 + ((rank+nranks-1)%nranks));
            if (fabs(actual - expected) > 1.0e-10) {
                SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
                                rank, j, i, expected, actual); );
                errors++;
                fflush(stdout);
            }
        }
Пример #10
0
void set_status( int new_status, int rank )
{
    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_status );
    STATUS[0] = new_status;
    MPI_Win_unlock( rank, win_status );
}
Пример #11
0
int main(int argc, char *argv[])
{
    int rank, nproc, i, x;
    int errors = 0, all_errors = 0;
    MPI_Win win = MPI_WIN_NULL;

    MPI_Comm shm_comm = MPI_COMM_NULL;
    int shm_nproc, shm_rank;
    double **shm_bases = NULL, *my_base;
    MPI_Win shm_win = MPI_WIN_NULL;
    MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL;
    int *shm_ranks = NULL, *shm_ranks_in_world = NULL;
    MPI_Aint get_target_base_offsets = 0;

    int win_size = sizeof(double) * BUF_CNT;
    int new_win_size = win_size;
    int win_unit = sizeof(double);
    int shm_root_rank_in_world;
    int origin = -1, put_target, get_target;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);

    if (nproc != 4) {
        if (rank == 0)
            printf("Error: must be run with four processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);
    MPI_Comm_rank(shm_comm, &shm_rank);
    MPI_Comm_size(shm_comm, &shm_nproc);
    MPI_Comm_group(shm_comm, &shm_group);

    /* Platform does not support shared memory or wrong host file, just return. */
    if (shm_nproc != 2) {
        goto exit;
    }

    shm_bases = (double **) calloc(shm_nproc, sizeof(double *));
    shm_ranks = (int *) calloc(shm_nproc, sizeof(int));
    shm_ranks_in_world = (int *) calloc(shm_nproc, sizeof(int));

    if (shm_rank == 0)
        shm_root_rank_in_world = rank;
    MPI_Bcast(&shm_root_rank_in_world, 1, MPI_INT, 0, shm_comm);

    /* Identify ranks of target processes which are located on node 0 */
    if (rank == 0) {
        for (i = 0; i < shm_nproc; i++) {
            shm_ranks[i] = i;
        }
        MPI_Group_translate_ranks(shm_group, shm_nproc, shm_ranks, world_group, shm_ranks_in_world);
    }
    MPI_Bcast(shm_ranks_in_world, shm_nproc, MPI_INT, 0, MPI_COMM_WORLD);

    put_target = shm_ranks_in_world[shm_nproc - 1];
    get_target = shm_ranks_in_world[0];

    /* Identify the rank of origin process which are located on node 1 */
    if (shm_root_rank_in_world == 1 && shm_rank == 0) {
        origin = rank;
        if (verbose) {
            printf("----   I am origin = %d, get_target = %d, put_target = %d\n",
                   origin, get_target, put_target);
        }
    }

    /* Allocate shared memory among local processes */
    MPI_Win_allocate_shared(win_size, win_unit, MPI_INFO_NULL, shm_comm, &my_base, &shm_win);

    if (shm_root_rank_in_world == 0 && verbose) {
        MPI_Aint size;
        int disp_unit;
        for (i = 0; i < shm_nproc; i++) {
            MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &shm_bases[i]);
            printf("%d --    shared query: base[%d]=%p, size %zd, "
                   "unit %d\n", rank, i, shm_bases[i], size, disp_unit);
        }
    }

    /* Get offset of put target(1) on get target(0) */
    get_target_base_offsets = (shm_nproc - 1) * win_size / win_unit;

    if (origin == rank && verbose)
        printf("%d --    base_offset of put_target %d on get_target %d: %zd\n",
               rank, put_target, get_target, get_target_base_offsets);

    /* Create using MPI_Win_create(). Note that new window size of get_target(0)
     * is equal to the total size of shm segments on this node, thus get_target
     * process can read the byte located on put_target process.*/
    for (i = 0; i < BUF_CNT; i++) {
        local_buf[i] = (i + 1) * 1.0;
        my_base[i] = 0.0;
    }

    if (get_target == rank)
        new_win_size = win_size * shm_nproc;

    MPI_Win_create(my_base, new_win_size, win_unit, MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    if (verbose)
        printf("%d --    new window my_base %p, size %d\n", rank, my_base, new_win_size);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Check if flush guarantees the completion of put operations on target side.
     *
     * P exclusively locks 2 processes whose windows are shared with each other.
     * P first put and flush to a process, then get the updated data from another process.
     * If flush returns before operations are done on the target side, the data may be
     * incorrect.*/
    for (x = 0; x < ITER; x++) {
        for (i = 0; i < BUF_CNT; i++) {
            local_buf[i] += x;
            check_buf[i] = 0;
        }

        if (rank == origin) {
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, put_target, 0, win);
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, get_target, 0, win);

            for (i = 0; i < BUF_CNT; i++) {
                MPI_Put(&local_buf[i], 1, MPI_DOUBLE, put_target, i, 1, MPI_DOUBLE, win);
            }
            MPI_Win_flush(put_target, win);

            MPI_Get(check_buf, BUF_CNT, MPI_DOUBLE, get_target,
                    get_target_base_offsets, BUF_CNT, MPI_DOUBLE, win);
            MPI_Win_flush(get_target, win);

            for (i = 0; i < BUF_CNT; i++) {
                if (check_buf[i] != local_buf[i]) {
                    printf("%d(iter %d) - Got check_buf[%d] = %.1lf, expected %.1lf\n",
                           rank, x, i, check_buf[i], local_buf[i]);
                    errors++;
                }
            }

            MPI_Win_unlock(put_target, win);
            MPI_Win_unlock(get_target, win);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

  exit:

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    if (shm_bases)
        free(shm_bases);
    if (shm_ranks)
        free(shm_ranks);
    if (shm_ranks_in_world)
        free(shm_ranks_in_world);

    if (shm_win != MPI_WIN_NULL)
        MPI_Win_free(&shm_win);

    if (win != MPI_WIN_NULL)
        MPI_Win_free(&win);

    if (shm_comm != MPI_COMM_NULL)
        MPI_Comm_free(&shm_comm);

    if (shm_group != MPI_GROUP_NULL)
        MPI_Group_free(&shm_group);

    if (world_group != MPI_GROUP_NULL)
        MPI_Group_free(&world_group);

    MPI_Finalize();

    return 0;
}
static int run_test(int nop)
{
    int i, x, errs = 0, errs_total = 0;
    MPI_Status stat;
    int dst;
    int winbuf_offset = 0;
    double t0, avg_total_time = 0.0, t_total = 0.0;
    double sum = 0.0;

    if (nprocs <= NPROCS_M) {
        ITER = ITER_S;
    }
    else {
        ITER = ITER_L;
    }

    target_computation_init();
    MPI_Win_lock_all(0, win);

    t0 = MPI_Wtime();
    for (x = 0; x < ITER; x++) {

        // send to all the left processes in a ring style
        for (dst = (rank + 1) % nprocs; dst != rank; dst = (dst + 1) % nprocs) {
            MPI_Accumulate(&locbuf[0], 1, MPI_DOUBLE, dst, rank, 1, MPI_DOUBLE, MPI_SUM, win);
        }
        MPI_Win_flush_all(win);

        target_computation();

        for (dst = (rank + 1) % nprocs; dst != rank; dst = (dst + 1) % nprocs) {
            for (i = 1; i < nop; i++) {
                MPI_Accumulate(&locbuf[i], 1, MPI_DOUBLE, dst, rank, 1, MPI_DOUBLE, MPI_SUM, win);
            }
        }
        MPI_Win_flush_all(win);

        debug_printf("[%d]MPI_Win_flush all done\n", x);
    }
    t_total += MPI_Wtime() - t0;
    t_total /= ITER;

    MPI_Win_unlock_all(win);
    MPI_Barrier(MPI_COMM_WORLD);

    target_computation_exit();

#ifdef CHECK
    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, win);
    sum = 0.0;
    for (i = 0; i < nop; i++) {
        sum += locbuf[i];
    }
    sum *= ITER;
    for (i = 0; i < nprocs; i++) {
        if (i == rank)
            continue;
        if (winbuf[i] != sum) {
            fprintf(stderr,
                    "[%d]computation error : winbuf[%d] %.2lf != %.2lf, nop %d\n",
                    rank, i, winbuf[i], sum, nop);
            errs += 1;
        }
    }
    MPI_Win_unlock(rank, win);
#endif

    MPI_Reduce(&t_total, &avg_total_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    MPI_Allreduce(&errs, &errs_total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    if (rank == 0) {
        avg_total_time /= nprocs;
#ifdef MTCORE
        fprintf(stdout,
                "mtcore: comp_size %d num_op %d nprocs %d total_time %lf\n",
                DGEMM_SIZE, nop, nprocs, avg_total_time);
#else
        fprintf(stdout,
                "orig: comp_size %d num_op %d nprocs %d total_time %lf\n",
                DGEMM_SIZE, nop, nprocs, avg_total_time);
#endif
    }

    return errs_total;
}
Пример #13
0
int main(int argc, char **argv) {
    int           procid, nproc, i;
    MPI_Win       llist_win;
    llist_ptr_t   head_ptr, tail_ptr;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &procid);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);

    /* Process 0 creates the head node */
    if (procid == 0)
        head_ptr.disp = alloc_elem(-1, llist_win);

    /* Broadcast the head pointer to everyone */
    head_ptr.rank = 0;
    MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
    tail_ptr = head_ptr;

    /* All processes concurrently append NUM_ELEMS elements to the list */
    for (i = 0; i < NUM_ELEMS; i++) {
        llist_ptr_t new_elem_ptr;
        int success;

        /* Create a new list element and register it with the window */
        new_elem_ptr.rank = procid;
        new_elem_ptr.disp = alloc_elem(procid, llist_win);

        /* Append the new node to the list.  This might take multiple attempts if
           others have already appended and our tail pointer is stale. */
        do {
            llist_ptr_t next_tail_ptr = nil;

            MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

            MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank,
                                  (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank,
                                  (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win);

            MPI_Win_unlock(tail_ptr.rank, llist_win);
            success = (next_tail_ptr.rank == nil.rank);

            if (success) {
                int i, flag;
                MPI_Aint result;

                MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

                MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank,
                                  (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
                                  MPI_REPLACE, llist_win);

                /* Note: accumulate is faster, since we don't need the result.  Replacing with
                   Fetch_and_op to create a more complete test case. */
                /*
                MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank,
                               (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1,
                               MPI_AINT, MPI_REPLACE, llist_win);
                */

                MPI_Win_unlock(tail_ptr.rank, llist_win);
                tail_ptr = new_elem_ptr;

                /* For implementations that use pt-to-pt messaging, force progress for other threads'
                   RMA operations. */
                for (i = 0; i < NPROBE; i++)
                    MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);

            } else {
                /* Tail pointer is stale, fetch the displacement.  May take multiple tries
                   if it is being updated. */
                do {
                    MPI_Aint junk = 0;

                    MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

                    MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank,
                                      (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
                                      MPI_NO_OP, llist_win);

                    MPI_Win_unlock(tail_ptr.rank, llist_win);
                } while (next_tail_ptr.disp == nil.disp);
                tail_ptr = next_tail_ptr;
            }
        } while (!success);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Traverse the list and verify that all processes inserted exactly the correct
       number of elements. */
    if (procid == 0) {
        int  have_root = 0;
        int  errors    = 0;
        int *counts, count = 0;

        counts = (int*) malloc(sizeof(int) * nproc);
        assert(counts != NULL);

        for (i = 0; i < nproc; i++)
            counts[i] = 0;

        tail_ptr = head_ptr;

        /* Walk the list and tally up the number of elements inserted by each rank */
        while (tail_ptr.disp != nil.disp) {
            llist_elem_t elem;

            MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

            MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
                    tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);

            MPI_Win_unlock(tail_ptr.rank, llist_win);

            tail_ptr = elem.next;

            /* This is not the root */
            if (have_root) {
                assert(elem.value >= 0 && elem.value < nproc);
                counts[elem.value]++;
                count++;

                if (verbose) {
                    int last_elem = tail_ptr.disp == nil.disp;
                    printf("%2d%s", elem.value, last_elem ? "" : " -> ");
                    if (count % ELEM_PER_ROW == 0 && !last_elem)
                        printf("\n");
                }
            }

            /* This is the root */
            else {
                assert(elem.value == -1);
                have_root = 1;
            }
        }

        if (verbose)
          printf("\n\n");

        /* Verify the counts we collected */
        for (i = 0; i < nproc; i++) {
            int expected = NUM_ELEMS;

            if (counts[i] != expected) {
                printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected);
                errors++;
            }
        }

        printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
        free(counts);
    }

    MPI_Win_free(&llist_win);

    /* Free all the elements in the list */
    for ( ; my_elems_count > 0; my_elems_count--)
        MPI_Free_mem(my_elems[my_elems_count-1]);

    MPI_Finalize();
    return 0;
}
// argc = cpu count, argv = file.cpp
int main(int argc, char *argv[])
{
	// create win object, this is used for locks
	MPI_Win win;
	// needed for MPI
	int namelen = 0;
	int myid, numprocs = 0;
	// processor name
	char processor_name[MPI_MAX_PROCESSOR_NAME];
	//initialize MPI execution environment
	MPI_Init(&argc, &argv);
	//each process get total # of processes
	//the total # of processes specified in mpirun �np n
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	//each process gets its own id
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
	// needed for times
	double program_start = 0;
	double program_end = 0;
	double process_start = 0;
	double process_end = 0;
	// take time
	if (myid == 0)
		// get start program time
		program_start = MPI_Wtime();
	// Gets the name of the processor
	MPI_Get_processor_name(processor_name, &namelen);
	// number of processes
	int n = 0;
	// display info
	fprintf(stderr, "process %d on %s\n", myid, processor_name);
	fflush(stderr);
	// create win object for locks
	MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
	// declare array to hold char from words plus \0
	char* arr;
	// list to keep track of length of each word
	short* list;
	// size of entire array
	int arr_size = 0;
	// size of the list
	int list_size = 0;
	// new list of words that are not palindromes
	char* new_words;
	// size of new array of words eahc process will
	// have inorder to send back to root after finding
	// all none plaindrome words
	int new_size = 0;
	// this will be the total size of non-palidrome words
	// which will be recieved from each process
	int total_size = 0;
	// temp vector to hold arrays in file
	std::vector<std::string>* words;
	// root does
	if (myid == 0)
	{
		// stream to open file
		std::fstream in;
		// vector to dynamically grow as we add strings to it
		// this makes it so we don't need to open file twice since 
		// we would normally open file and count number of words
		// then reopen it to get the actually words to put in an array
		// we just declared based off the size we got the first time
		words = new std::vector<std::string>();
		// open file as instream
		in.open("Palindromes.txt", std::ios::in);
		// if error opening file
		if (in.fail())
		{
			// display message and close
			std::cout << "Error Opening File" << std::endl;
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
		// no error while opening file
		else
		{
			// temp string to hold each word
			std::string temp;
			// grab each word from each line
			while (getline(in, temp))
			{
				// put word into vector
				words->push_back(temp);
				// loop each string (word) and get it's length
				for (int i = 0; i < temp.size(); i++)
					//increment size
					arr_size++;
				// increment one last time since we will be adding a 
				// \0 for each word
				arr_size++;
			}
			// done, close file
			in.close();
		}
		// set size depending on word size
		list_size = words->size();
		// we added one since later on in the program
		// we use the next index to mark where the loop stops
		// without one at end, there is no way to mark the end
		// and last word never gets processed
		list_size++;
	}
	// take time
	if (myid == 0)
		// get start program time
		process_start = MPI_Wtime();
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// broadcast the size of char array and list to other processes
	// they will be used to allocate the needed space per process
	MPI_Bcast(&arr_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// broadcats list size
	MPI_Bcast(&list_size, 1, MPI_SHORT, 0, MPI_COMM_WORLD);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// allocate list, list should be number of \0
	// since there is one per word, it should be the number of words
	list = new short[list_size];
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// allocate array
	arr = new char[arr_size];
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// root does this
	if (myid == 0)
	{
		// put the values into array
		// using a counter
		int counter = 0;
		// loop entier array, while looping each word
		// and put them sequentially into array
		// with null terminator ending each word
		// we do list_size-1 since list_size is increased by 1
		// to fix an earlier problem where we need to mark
		// last element in list to be able to end it
		// without it, it crashes, not sure why
		for (int i = 0; i < list_size - 1; i++)
		{
			//mark start of word
			arr[counter] = '\0';
			// put null terminator index into list
			list[i] = counter;
			// incremenet counter
			counter++;
			// loop to get count of the next word
			for (int j = 0; j < words->at(i).size(); j++)
			{
				// get word from vector at i (string is returned)
				// get char at j from string
				arr[counter++] = words->at(i).at(j);
			}
		}
		// make last element to stop loops later in program
		list[list_size - 1] = counter;
		// free up memory, this object is no longer used
		delete words;
	}
	// broadcast array of char (basically all the words
	// in a char array where each word ends in \0)
	// also broadcast list of word indexes
	MPI_Barrier(MPI_COMM_WORLD);
	// send list of indexes to all processes
	MPI_Bcast(list, list_size, MPI_SHORT, 0, MPI_COMM_WORLD);
	// send array ofwords to processes
	MPI_Bcast(arr, arr_size, MPI_CHAR, 0, MPI_COMM_WORLD);
	// run function for each process to create a new list of non-palindromes
	// this is using cyclic partiioning
	new_words = markParalindromes(myid, arr_size, list_size, arr, list, numprocs, new_size);
	// use lock
	MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
	// create out stream object
	std::fstream out;
	// open file
	out.open("Non-Palindromes.txt", std::ios::out | std::ios::app);
	// if there is an error creating/opening
	if (out.fail())
	{
		// display message and close
		std::cout << "Error Opening File" << std::endl;
		MPI_Abort(MPI_COMM_WORLD, 1);
	}
	else
	{
		// loop non-palindrome words
		for (int i = 0; i < new_size; i++)
		{
			// if char is null terminator
			// replace it with newline
			if (new_words[i] == '\0')
			{
				// if I write one newline, it does not work
				// but two does
				out << std::endl;
				out << std::endl;
			}
			// if not null terminator
			else
				// write char of current word
				out << new_words[i];
		}
		// needed for end of file
		out << std::endl;
		out << std::endl;
		// close file
		out.close();
	}
	// unlock 
	MPI_Win_unlock(1, win);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	if (myid == 0)
		// get start program time
		process_end = MPI_Wtime();
	// clean up and display results
	if (myid == 0)
	{
		// clean up
		if(arr != NULL)
			delete[] arr;
		if (list != NULL)
			delete[] list;
		if (new_words != NULL)
			delete[] new_words;
	}
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	if (myid == 0)
		// get start program time
		program_end = MPI_Wtime();
	if(myid == 0)
	{ 
		// get total time
		std::cout << "Program Time: " << (program_end - program_start) << "s" << std::endl;
		// get processe stime
		std::cout << "Process Time: " << (process_end - process_start) << "s" << std::endl;
	}
	// needed to clean up 
	MPI_Win_free(&win);
	MPI_Finalize();
}
Пример #15
0
numb hashlookup(obj o)
/* Never fill up the hash table! Returns the number of times a value
 * was seen so far. If an obj is seen for the first time, it is stored
 * without copying it, do not free the object in this case, ownership
 * goes over to the hash table. */
{
  numb v;
  int destRank, destPos;
  numb localHash,localCount;

  v = f(o);
  localHash=0;
  localCount=0;

  while (1) {
    /* work out who and where the hash should go */
    destRank = v/nobj;
    destPos = v - nobj*destRank;
    /* Get the value in the hash table */
    if(destRank!=rank){
      remote++;
    }
    else{
      local++;
    }
    MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win);
    status = MPI_Get(&localHash,1,MPI_INT,destRank,destPos,1,MPI_INT,win);
    MPI_Win_unlock(destRank,win);
    if (localHash) {
      /*          if (memcmp(o,hashtab[v],m)) {*/
      if(!(*o==localHash)){
        v++;
        destPos++;
        if (v >= hashlen) v = 0;
        if (destPos >= nobj){
          /* don't fall off the end, go to the next one */
          destPos=0;
          destRank++;
          /* and for rank too */
          if(destRank>=size) destRank=0;

        }
        collisions++;
      } else {   /* Found! */
        /* get the count */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win2);
        MPI_Get(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2);
        localCount++;
        MPI_Put(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2);
        MPI_Win_unlock(destRank,win2);
        /*        hashcount[v]++;
                  return hashcount[v];*/
        return localCount;
      }
    } else {   /* Definitely not found */
      /*        hashtab[yv] = o;*/
      MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win);
      /* What an I copying here */
      MPI_Put(o,1,MPI_INT,destRank,destPos,1,MPI_INT,win);
      MPI_Win_unlock(destRank,win);

      /*        hashcount[v] = 1;*/
      MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win2);
      localCount=1;
      MPI_Put(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2);
      MPI_Win_unlock(destRank,win2);

      return localCount;
    }

  }
}
Пример #16
0
int main( int argc, char *argv[] )
{
    int      errs = 0;
    MPI_Win  win;
    int  *rmabuffer=0, *getbuf=0;
    MPI_Aint bufsize=0, getbufsize=0;
    int      master, partner, next, wrank, wsize, i;
    int      ntest = LAST_TEST;
    int *srcbuf;

    MTest_Init( &argc, &argv );

    /* Determine who is responsible for each part of the test */
    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
    if (wsize < 3) {
	fprintf( stderr, "This test requires at least 3 processes\n" );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }

    master  = 0;
    partner = 1;
    next = wrank + 1;
    if (next == partner) next++;
    if (next >= wsize) {
	next = 0;
	if (next == partner) next++;
    }

    /* Determine the last test to run (by default, run them all) */
    for (i=1; i<argc; i++) {
	if (strcmp( "-ntest", argv[i] ) == 0) { 
	    i++;
	    if (i < argc) {
		ntest = atoi( argv[i] );
	    }
	    else {
		fprintf( stderr, "Missing value for -ntest\n" );
		MPI_Abort( MPI_COMM_WORLD, 1 );
	    }
	}
    }

    MPI_Type_vector( veccount, 1, stride, MPI_INT, &vectype );
    MPI_Type_commit( &vectype );

    /* Create the RMA window */
    bufsize = 0;
    if (wrank == master) {
	bufsize = RMA_SIZE;
	MPI_Alloc_mem( bufsize*sizeof(int), MPI_INFO_NULL, &rmabuffer );
    }
    else if (wrank == partner) {
	getbufsize = RMA_SIZE;
	getbuf = (int *)malloc( getbufsize*sizeof(int) );
	if (!getbuf) {
	    fprintf( stderr, "Unable to allocated %d bytes for getbuf\n", 
		    (int)getbufsize );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}
    }
    srcbuf = malloc(RMA_SIZE*sizeof(*srcbuf));
    assert(srcbuf);

    MPI_Win_create( rmabuffer, bufsize, sizeof(int), MPI_INFO_NULL,
		    MPI_COMM_WORLD, &win );
    
    /* Run a sequence of tests */
    for (i=0; i<=ntest; i++) {
	if (wrank == master) {
	    MTestPrintfMsg( 0, "Test %d\n", i );
	    /* Because this lock is local, it must return only when the
	     lock is acquired */
	    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win );
	    RMATestInit( i, rmabuffer, bufsize );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD );
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, 
		      MPI_COMM_WORLD, MPI_STATUS_IGNORE );
	    MPI_Win_unlock( master, win );
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD, 
		      MPI_STATUS_IGNORE );
	    errs += RMACheck( i, rmabuffer, bufsize );
	}
	else if (wrank == partner) {
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD,
		      MPI_STATUS_IGNORE );
	    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win );
	    RMATest( i, win, master, srcbuf, RMA_SIZE, getbuf, getbufsize );
	    MPI_Win_unlock( master, win );
	    errs += RMACheckGet( i, win, getbuf, getbufsize );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD );
	}
	else {
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, 
		      MPI_COMM_WORLD, MPI_STATUS_IGNORE );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD );
	}
    }

    if (rmabuffer) {
	MPI_Free_mem( rmabuffer );
    }
    if (getbuf) {
	free( getbuf );
    }
    MPI_Win_free( &win );
    MPI_Type_free( &vectype );

    MTest_Finalize( errs );
    MPI_Finalize();
    return MTestReturnValue( errs );
}
Пример #17
0
int main(int argc, char *argv[])
{
    int rank, size, i, j, k;
    int errors = 0;
    int origin_shm, origin_am, dest;
    int *orig_buf = NULL, *result_buf = NULL, *compare_buf = NULL,
        *target_buf = NULL, *check_buf = NULL;
    MPI_Win win;
    MPI_Status status;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size != 3) {
        /* run this test with three processes */
        goto exit_test;
    }

    /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */
    dest = 2;
    origin_shm = 0;
    origin_am = 1;

    if (rank != dest) {
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &orig_buf);
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &result_buf);
        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &compare_buf);
    }

    MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &target_buf, &win);

    for (k = 0; k < LOOP_SIZE; k++) {

        /* init buffers */
        if (rank == origin_shm) {
            orig_buf[0] = 1;
            compare_buf[0] = 0;
            result_buf[0] = 0;
        }
        else if (rank == origin_am) {
            orig_buf[0] = 0;
            compare_buf[0] = 1;
            result_buf[0] = 0;
        }
        else {
            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
            target_buf[0] = 0;
            MPI_Win_unlock(rank, win);
        }

        MPI_Barrier(MPI_COMM_WORLD);

        /* perform FOP */
        MPI_Win_lock_all(0, win);
        if (rank != dest) {
            MPI_Compare_and_swap(orig_buf, compare_buf, result_buf, MPI_INT, dest, 0, win);
            MPI_Win_flush(dest, win);
        }
        MPI_Win_unlock_all(win);

        MPI_Barrier(MPI_COMM_WORLD);

        /* check results */
        if (rank != dest) {
            MPI_Gather(result_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);
        }
        else {
            MPI_Alloc_mem(sizeof(int) * 3, MPI_INFO_NULL, &check_buf);
            MPI_Gather(target_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);

            if (!(check_buf[dest] == 0 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 1)
                && !(check_buf[dest] == 1 && check_buf[origin_shm] == 0 &&
                     check_buf[origin_am] == 0)) {

                printf
                    ("Wrong results: target result = %d, origin_shm result = %d, origin_am result = %d\n",
                     check_buf[dest], check_buf[origin_shm], check_buf[origin_am]);

                printf
                    ("Expected results (1): target result = 1, origin_shm result = 0, origin_am result = 0\n");
                printf
                    ("Expected results (2): target result = 0, origin_shm result = 0, origin_am result = 1\n");

                errors++;
            }

            MPI_Free_mem(check_buf);
        }
    }

    MPI_Win_free(&win);

    if (rank == origin_am || rank == origin_shm) {
        MPI_Free_mem(orig_buf);
        MPI_Free_mem(result_buf);
        MPI_Free_mem(compare_buf);
    }

  exit_test:
    if (rank == dest && errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();
    return 0;
}
Пример #18
0
//functions for task stealing!!!
int read_last_task( task_type_unit * task0, int target_rank, int num_tries)
{
    // return codes:
    // 0 - element read
    // 1 - q is empty
    // 2 - element not read (more tries than <num_tries>)
    int ret = 0;
    int iamfree = 0;
    int my_offset;

    int tries_cntr=0;


    while(iamfree == 0 && (num_tries==0 || tries_cntr<num_tries)) //try to lock offs window putting -2 value
    {
        tries_cntr++;

        MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_offs );
        MPI_Get( &my_offset, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs );
        MPI_Put( &lock, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs ); //implicitly block OFFSET win on proc <rank> (block code -2)
        MPI_Win_unlock( target_rank, win_offs );
        if(my_offset >= -1) //if the window was not locked before
        {
            iamfree = 1;
        }
    }
    if(iamfree == 0) // q is still blocked - go furter
    {
        ret = 2;
        return(ret);
    }

    // offs window is now locked by me! work!
    if(my_offset == -1) //q is empty
    {
        ret = 1;
    }
    else
    {
        //if(ts_logging==1)
        {
            sched_log_file = fopen(sched_log,"a");
            fprintf(sched_log_file, "[%f] Take task N %d\n", MPI_Wtime(), my_offset);
            fclose(sched_log_file);
        }

        //task_type_unit buf[task_type_length];
        MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_q ); //lock the q
        MPI_Get( task0, task_type_length, mpi_task_type_unit, target_rank, my_offset, task_type_length, mpi_task_type_unit, win_q );
        MPI_Win_unlock( target_rank, win_q );

        //task0[0] = buf[0];
        //task0[1] = buf[1];


        my_offset--;

    }
    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_offs );
    MPI_Put( &my_offset, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs ); //UNBLOCK OFFSET win (put proper offs val - either changed or not)
    MPI_Win_unlock( target_rank, win_offs );

    return(ret);
}
Пример #19
0
int main(int argc, char *argv[])
{
	int rank, nprocs, A[SIZE2], B[SIZE2], i;
	MPI_Win win;

	MPI_Init(&argc,&argv);
	Test_Init_No_File();
	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD,&rank);

	if (nprocs != 2) {
		printf("Run this program with 2 processes\n");
		MPI_Abort(MPI_COMM_WORLD,1);
	}

	if (rank == 0) {
		for (i = 0; i < SIZE2; i++)
			A[i] = B[i] = i;
		MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);

		for (i = 0; i < SIZE1; i++) {
			MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
			MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win);
			MPI_Win_unlock(1, win);
		}

		for (i = 0; i < SIZE1; i++) {
			MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
			MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win);
			MPI_Win_unlock(1, win);
		}

		MPI_Win_free(&win);

		for (i = 0; i < SIZE1; i++)
			if (B[i] != (-4) * (i + SIZE1)) {
				printf("Get Error: B[%d] is %d, should be %d\n", i, B[i], (-4) * (i + SIZE1));
				Test_Failed(NULL);
			}
	}

	else {  /* rank=1 */
		for (i = 0; i < SIZE2; i++)
			B[i] = (-4) * i;
		MPI_Win_create(B, SIZE2 * sizeof(int), sizeof(int), MPI_INFO_NULL,
				MPI_COMM_WORLD, &win);

		MPI_Win_free(&win);

		for (i = 0; i < SIZE1; i++) {
			if (B[i] != i) {
				printf("Put Error: B[%d] is %d, should be %d\n", i, B[i], i);
				Test_Failed(NULL);
			}
		}
	}

	Test_Waitforall();
	Test_Global_Summary();
	MPI_Finalize();
	return 0;
}
Пример #20
0
/*
 * Class:     mpi_Win
 * Method:    unlock
 * Signature: (JI)V
 */
JNIEXPORT void JNICALL Java_mpi_Win_unlock(
        JNIEnv *env, jobject jthis, jlong win, jint rank)
{
    int rc = MPI_Win_unlock(rank, (MPI_Win)win);
    ompi_java_exceptionCheck(env, rc);
}
Пример #21
0
int main(int argc, char **argv) {
    int        me, nproc;
    int        msg_length, round, i;
    double     t_start, t_stop;
    u_int8_t  *snd_buf;  // Send buffer (byte array)
    u_int8_t  *rcv_buf;  // Receive buffer (byte array)
    MPI_Win    window;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &me);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc != 2) {
        if (me == 0) printf("This benchmark should be run on exactly two processes");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (me == 0)
        printf("MPI-2 passive ping-pong latency test, performing %d rounds at each xfer size.\n\n", NUM_ROUNDS);

    MPI_Alloc_mem(MAX_SIZE, MPI_INFO_NULL, &rcv_buf);
    MPI_Alloc_mem(MAX_SIZE, MPI_INFO_NULL, &snd_buf);

    MPI_Win_create(rcv_buf, MAX_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window);

    for (i = 0; i < MAX_SIZE; i++) {
        snd_buf[i] = 1;
    }

    for (msg_length = 1; msg_length <= MAX_SIZE; msg_length *= 2) {
        MPI_Barrier(MPI_COMM_WORLD);
        t_start = MPI_Wtime();

        // Perform NUM_ROUNDS ping-pongs
        for (round = 0; round < NUM_ROUNDS*2; round++) {

            // I am the sender
            if (round % 2 == me) {
                // Clear start and end markers for next round
                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window);
                rcv_buf[0] = 0;
                rcv_buf[msg_length-1] = 0;
                MPI_Win_unlock(me, window);

                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, (me+1)%2, 0, window);
                MPI_Put(snd_buf, msg_length, MPI_BYTE, (me+1)%2, 0, msg_length, MPI_BYTE, window);
                MPI_Win_unlock((me+1)%2, window);
            }

            // I am the receiver: Poll start and end markers
            else {
                u_int8_t val;

                do {
                    //MPI_Iprobe(0, 0, MPI_COMM_WORLD, &val, MPI_STATUS_IGNORE);
                    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window);
                    val = ((volatile u_int8_t*)rcv_buf)[0];
                    MPI_Win_unlock(me, window);
                } while (val == 0);

                do {
                    //MPI_Iprobe(0, 0, MPI_COMM_WORLD, &val, MPI_STATUS_IGNORE);
                    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window);
                    val = ((volatile u_int8_t*)rcv_buf)[msg_length-1];
                    MPI_Win_unlock(me, window);
                } while (val == 0);
            }
        }

        MPI_Barrier(MPI_COMM_WORLD);
        t_stop = MPI_Wtime();

        if (me == 0)
            printf("%8d bytes \t %12.8f us\n", msg_length, (t_stop-t_start)/NUM_ROUNDS*1.0e6);
    }

    MPI_Win_free(&window);
    MPI_Free_mem(snd_buf);
    MPI_Free_mem(rcv_buf);

    MPI_Finalize();

    return 0;
}
Пример #22
0
void add_element_sorted( task_type task0, int rank, int sort_param_num )
{
    int iamfree = 0;
    int my_offset;
    int i;

    while(iamfree == 0) //try to lock offs window putting -2 value
    {
        MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_offs );
        my_offset = OFFSET[0];
        OFFSET[0] = lock;
        MPI_Win_unlock( rank, win_offs );
        if(my_offset >= -1) //if the window was not locked before
        {
            iamfree = 1;
        }
    }

    //if(ts_logging==1)
    {
        sched_log_file = fopen(sched_log,"a");
        fprintf(sched_log_file, "[%f] Adding task [%3.1f][%6.4f] to my queue, sorted, sort param num %d\n", MPI_Wtime(), task0[0], task0[1], sort_param_num);
        fclose(sched_log_file);
    }
    my_offset++;

    task_type_unit local_q[my_offset*task_type_length];
    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_q ); //lock the q
    //locally copy the whole queue
    //MPI_Get( local_q, my_offset*task_type_length, mpi_task_type_unit, rank, 0, my_offset*task_type_length, mpi_task_type_unit, win_q ); //get the last value from queue window
    memcpy(local_q, QUEUE, sizeof(task_type_unit)*my_offset*task_type_length);

    MPI_Win_unlock( rank, win_q );

    // seek for the item before which we should place our new task
    int my_index=0;
    task_type_unit my_param = local_q[sort_param_num];
    for(i=sort_param_num; (i<my_offset*task_type_length) && (task0[sort_param_num]>my_param); i+=task_type_length)
    {
        my_param = local_q[i];
        my_index = i-sort_param_num;
    }
    if(task0[sort_param_num]>my_param) my_index+=task_type_length;

    // we have to shift the last part of the queue
    // z.b. in such a primitive way as I did or using memcpy
    // starting from last element ending with index, move each param of each element to the next position

    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_q ); //lock the q
    // now put the new element to its place
    for(i=0; i<task_type_length; i++)
    {
        QUEUE[my_index + i] = task0[i]; // or use memcpy
    }
    for(i=my_offset*task_type_length-1; i>=my_index; i-- )
    {
        QUEUE[i+task_type_length] = local_q[i];
    }
    MPI_Win_unlock( rank, win_q );

    //if(ts_logging==1)
    {
        sched_log_file = fopen(sched_log,"a");
        fprintf(sched_log_file, "[%f] New number of tasks is N %d\n", MPI_Wtime(), my_offset);
        fclose(sched_log_file);
    }

    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_offs );
    OFFSET[0] = my_offset;
    MPI_Win_unlock( rank, win_offs );
}
Пример #23
0
int main(int argc, char **argv)
{
    int comm_size, comm_rank, i, by_rank, errs = 0;
    int rc;
    char *rma_win_addr, *local_buf;
    char check;
    MPI_Win win;
    MPI_Status status;

    MTest_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);

    if ((comm_size > (MAX_BUF_SIZE / PUT_SIZE)) || (comm_size <= 2))
        MPI_Abort(MPI_COMM_WORLD, 1);

    /* If alloc mem returns an error (because too much memory is requested */
    MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN );

    rc = MPI_Alloc_mem(MAX_BUF_SIZE, MPI_INFO_NULL, (void *) &rma_win_addr);
    if (rc) {
	MTestPrintErrorMsg( "Unable to MPI_Alloc_mem space (not an error)", rc );
	MPI_Abort( MPI_COMM_WORLD, 0 );
    }

    memset(rma_win_addr, 0, MAX_BUF_SIZE);
    MPI_Win_create((void *) rma_win_addr, MAX_BUF_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    rc = MPI_Alloc_mem(PUT_SIZE, MPI_INFO_NULL, (void *) &local_buf);
    if (rc) {
	MTestPrintErrorMsg( "Unable to MPI_Alloc_mem space (not an error)", rc );
	MPI_Abort( MPI_COMM_WORLD, 0 );
    }

    for (i = 0; i < PUT_SIZE; i++)
        local_buf[i] = 1;

    MPI_Barrier(MPI_COMM_WORLD);

    if (comm_rank == 0) { /* target */
        for (i = 0; i < (NUM_TIMES * (comm_size - 2)); i++) {
            /* Wait for a message from the server to notify me that
             * someone put some data in my window */
            MPI_Recv(&by_rank, 1, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);

            /* Got a message from the server that 'by_rank' put some
             * data in my local window. Check the last byte to make
             * sure we got it correctly. */
            MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
            MPI_Get((void *) &check, 1, MPI_CHAR, 0, ((by_rank + 1) * PUT_SIZE) - 1, 1,
                    MPI_CHAR, win);
            MPI_Win_unlock(0, win);

            /* If this is not the value I expect, count it as an error */
            if (check != 1)
                errs++;

            /* Reset the buffer to zero for the next round */
            memset((void *) (rma_win_addr + (by_rank * PUT_SIZE)), 0, PUT_SIZE);

            /* Tell the origin that I am ready for the next round */
            MPI_Send(NULL, 0, MPI_INT, by_rank, 0, MPI_COMM_WORLD);
        }
    }

    else if (comm_rank == 1) { /* server */
        for (i = 0; i < (NUM_TIMES * (comm_size - 2)); i++) {
            /* Wait for a message from any of the origin processes
             * informing me that it has put data to the target
             * process */
            MPI_Recv(NULL, 0, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
            by_rank = status.MPI_SOURCE;

            /* Tell the target process that it should be seeing some
             * data in its local buffer */
            MPI_Send(&by_rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
        }
    }

    else { /* origin */
        for (i = 0; i < NUM_TIMES; i++) {
            /* Put some data in the target window */
            MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
            MPI_Put(local_buf, PUT_SIZE, MPI_CHAR, 0, comm_rank * PUT_SIZE, PUT_SIZE,
                    MPI_CHAR, win);
            MPI_Win_unlock(0, win);

            /* Tell the server that the put has completed */
            MPI_Send(NULL, 0, MPI_INT, 1, 0, MPI_COMM_WORLD);

            /* Wait for a message from the target that it is ready for
             * the next round */
            MPI_Recv(NULL, 0, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
        }
    }

    MPI_Win_free(&win);

    MPI_Free_mem(rma_win_addr);
    MPI_Free_mem(local_buf);

    MTest_Finalize(errs);
    MPI_Finalize();

    return 0;
}
Пример #24
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *src_buf;
    MPI_Win buf_win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);

    if (rank == 0)
        if (verbose) printf("MPI RMA Strided Accumulate Test:\n");

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf + i) = -1.0;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided accumulate operations */

    for (i = 0; i < ITERATIONS; i++) {
      int ndims               = 2;
      int src_arr_sizes[2]    = { XDIM, YDIM };
      int src_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM };
      int src_arr_starts[2]   = {    0,    0 };
      int dst_arr_sizes[2]    = { XDIM, YDIM };
      int dst_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM };
      int dst_arr_starts[2]   = {    0,    0 };
      MPI_Datatype src_type, dst_type;

      MPI_Type_create_subarray(ndims, src_arr_sizes, src_arr_subsizes, src_arr_starts,
          MPI_ORDER_C, MPI_DOUBLE, &src_type);

      MPI_Type_create_subarray(ndims, dst_arr_sizes, dst_arr_subsizes, dst_arr_starts,
          MPI_ORDER_C, MPI_DOUBLE, &dst_type);

      MPI_Type_commit(&src_type);
      MPI_Type_commit(&dst_type);

      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);

      MPI_Accumulate(src_buf, 1, src_type, peer, 0, 1, dst_type, MPI_SUM, buf_win);

      MPI_Win_unlock(peer, buf_win);

      MPI_Type_free(&src_type);
      MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = -1.0 + (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS);
        if (fabs(actual - expected) > 1.0e-10) {
          SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual); );
          errors++;
          fflush(stdout);
        }
      }
Пример #25
0
/* tests passive target RMA on 2 processes. tests the lock-single_op-unlock 
   optimization for less common cases:

   origin datatype derived, target datatype predefined

*/
int main(int argc, char *argv[]) 
{ 
    int          wrank, nprocs, *srcbuf, *rmabuf, i;
    int          memsize;
    MPI_Datatype vectype;
    MPI_Win      win;
    int          errs = 0;

    MTest_Init(&argc,&argv); 
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs); 
    MPI_Comm_rank(MPI_COMM_WORLD,&wrank); 

    if (nprocs < 2) {
        printf("Run this program with 2 or more processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    memsize = 10 * 4 * nprocs;
    /* Create and initialize data areas */
    srcbuf = (int *)malloc( sizeof(int) * memsize );
    MPI_Alloc_mem( sizeof(int) * memsize, MPI_INFO_NULL, &rmabuf );
    if (!srcbuf || !rmabuf) {
	printf( "Unable to allocate srcbuf and rmabuf of size %d\n", memsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    for (i=0; i<memsize; i++) {
      rmabuf[i] = -i;
      srcbuf[i] = i;
    }

    MPI_Win_create( rmabuf, memsize*sizeof(int), sizeof(int), MPI_INFO_NULL, 
		    MPI_COMM_WORLD, &win );

    /* Vector of 10 elements, separated by 4 */
    MPI_Type_vector( 10, 1, 4, MPI_INT, &vectype );
    MPI_Type_commit( &vectype );

    /* Accumulate with a derived origin type and target predefined type*/
    if (wrank == 0) {
	MPI_Barrier( MPI_COMM_WORLD );
	MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win );
	for (i=0; i<10; i++) {
	    if (rmabuf[i] != -i + 4*i) {
		errs++;
		printf( "Acc: expected rmabuf[%d] = %d but saw %d\n", 
			i, -i + 4*i, rmabuf[i] );
	    }
	    rmabuf[i] = -i;
	}
	for (i=10; i<memsize; i++) {
	    if (rmabuf[i] != -i) {
		errs++;
		printf( "Acc: expected rmabuf[%d] = %d but saw %d\n", 
			i, -i, rmabuf[i] );
		rmabuf[i] = -i;
	    }
	}
	MPI_Win_unlock( 0, win );
    }
    else if (wrank == 1) {
	MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win );
	MPI_Accumulate( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, MPI_SUM, win );
	MPI_Win_unlock( 0, win );
	MPI_Barrier( MPI_COMM_WORLD );
    }
    else {
	MPI_Barrier( MPI_COMM_WORLD );
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Put with a derived origin type and target predefined type*/
    if (wrank == 0) {
	MPI_Barrier( MPI_COMM_WORLD );
	MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win );
	for (i=0; i<10; i++) {
	    if (rmabuf[i] != 4*i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, 4*i, rmabuf[i] );
	    }
	    rmabuf[i] = -i;
	}
	for (i=10; i<memsize; i++) {
	    if (rmabuf[i] != -i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, -i, rmabuf[i] );
		rmabuf[i] = -i;
	    }
	}
	MPI_Win_unlock( 0, win );
    }
    else if (wrank == 1) {
	MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win );
	MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win );
	MPI_Win_unlock( 0, win );
	MPI_Barrier( MPI_COMM_WORLD );
    }
    else {
	MPI_Barrier( MPI_COMM_WORLD );
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Put with a derived origin type and target predefined type, with 
       a get (see the move-to-end optimization) */
    if (wrank == 0) {
	MPI_Barrier( MPI_COMM_WORLD );
	MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win );
	for (i=0; i<10; i++) {
	    if (rmabuf[i] != 4*i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, 4*i, rmabuf[i] );
	    }
	    rmabuf[i] = -i;
	}
	for (i=10; i<memsize; i++) {
	    if (rmabuf[i] != -i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, -i, rmabuf[i] );
		rmabuf[i] = -i;
	    }
	}
	MPI_Win_unlock( 0, win );
    }
    else if (wrank == 1) {
	int val;
	MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win );
	MPI_Get( &val, 1, MPI_INT, 0, 10, 1, MPI_INT, win );
	MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win );
	MPI_Win_unlock( 0, win );
	MPI_Barrier( MPI_COMM_WORLD );
	if (val != -10) {
	    errs++;
	    printf( "Get: Expected -10, got %d\n", val );
	}
    }
    else {
	MPI_Barrier( MPI_COMM_WORLD );
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Put with a derived origin type and target predefined type, with 
       a get already at the end (see the move-to-end optimization) */
    if (wrank == 0) {
	MPI_Barrier( MPI_COMM_WORLD );
	MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win );
	for (i=0; i<10; i++) {
	    if (rmabuf[i] != 4*i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, 4*i, rmabuf[i] );
	    }
	    rmabuf[i] = -i;
	}
	for (i=10; i<memsize; i++) {
	    if (rmabuf[i] != -i) {
		errs++;
		printf( "Put: expected rmabuf[%d] = %d but saw %d\n", 
			i, -i, rmabuf[i] );
		rmabuf[i] = -i;
	    }
	}
	MPI_Win_unlock( 0, win );
    }
    else if (wrank == 1) {
	int val;
	MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win );
	MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win );
	MPI_Get( &val, 1, MPI_INT, 0, 10, 1, MPI_INT, win );
	MPI_Win_unlock( 0, win );
	MPI_Barrier( MPI_COMM_WORLD );
	if (val != -10) {
	    errs++;
	    printf( "Get: Expected -10, got %d\n", val );
	}
    }
    else {
	MPI_Barrier( MPI_COMM_WORLD );
    }

    MPI_Win_free( &win );
    MPI_Free_mem( rmabuf );
    free( srcbuf );
    MPI_Type_free( &vectype );

    MTest_Finalize(errs);
    MPI_Finalize(); 
    return 0; 
} 
Пример #26
0
int main(int argc, char **argv)
{
    int nprocs, mpi_err, *array;
    int getval, disp, errs=0;
    MPI_Win win;
    MPI_Datatype type;
    
    MTest_Init(&argc,&argv); 

    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    if (nprocs != 1) {
        printf("Run this program with 1 process\n");
        MPI_Abort(MPI_COMM_WORLD,1);
    }

    /* To improve reporting of problems about operations, we
       change the error handler to errors return */
    MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN );

    /* create an indexed datatype that points to the second integer 
       in an array (the first integer is skipped). */
    disp  =  1;
    mpi_err = MPI_Type_create_indexed_block(1, 1, &disp, MPI_INT, &type);
    if (mpi_err != MPI_SUCCESS) goto err_return;
    mpi_err = MPI_Type_commit(&type);
    if (mpi_err != MPI_SUCCESS) goto err_return;

    /* allocate window of size 2 integers*/
    mpi_err = MPI_Alloc_mem(2*sizeof(int), MPI_INFO_NULL, &array);
    if (mpi_err != MPI_SUCCESS) goto err_return;

    /* create window object */
    mpi_err = MPI_Win_create(array, 2*sizeof(int), sizeof(int), 
                             MPI_INFO_NULL, MPI_COMM_WORLD, &win);
    if (mpi_err != MPI_SUCCESS) goto err_return;
 
    /* initialize array */
    array[0] = 100;
    array[1] = 200;

    getval = 0;
    
    /* To improve reporting of problems about operations, we
       change the error handler to errors return */
    MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN );

    mpi_err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win);
    if (mpi_err != MPI_SUCCESS) goto err_return;

    /* get the current value of element array[1] */
    mpi_err = MPI_Get(&getval, 1, MPI_INT, 0, 0, 1, type, win);
    if (mpi_err != MPI_SUCCESS) goto err_return;

    mpi_err = MPI_Win_unlock(0, win);
    if (mpi_err != MPI_SUCCESS) goto err_return;

    /* getval should contain the value of array[1] */
    if (getval != array[1]) {
        errs++;
        printf("getval=%d, should be %d\n", getval, array[1]);
    }

    MPI_Free_mem(array);
    MPI_Win_free(&win);
    MPI_Type_free(&type);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;

 err_return:
    printf("MPI function error returned an error\n");
    MTestPrintError( mpi_err );
    errs++;
    MTest_Finalize(errs);
    MPI_Finalize();
    return 1;
}
Пример #27
0
int main(int argc, char *argv[])
{
    MPI_Win win;
    int errors = 0;
    int rank, nproc, i;
    double *orig_buf;
    double *tar_buf;
    MPI_Datatype vector_dtp;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &orig_buf);
    MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &tar_buf);

    for (i = 0; i < DATA_SIZE; i++) {
        orig_buf[i] = 1.0;
        tar_buf[i]  = 0.5;
    }

    MPI_Type_vector(5 /* count */ , 3 /* blocklength */ , 5 /* stride */ , MPI_DOUBLE, &vector_dtp);
    MPI_Type_commit(&vector_dtp);

    MPI_Win_create(tar_buf, sizeof(double) * DATA_SIZE, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    if (rank == 0) {
        MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
        MPI_Accumulate(orig_buf, 1, vector_dtp, 1, 0, 1, vector_dtp, MPI_SUM, win);
        MPI_Win_unlock(1, win);
    }

    MPI_Win_fence(0, win);

    if (rank == 1) {
        for (i = 0; i < DATA_SIZE; i++) {
            if (i % 5 < 3) {
                if (tar_buf[i] != 1.5) {
                    printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]);
                    errors++;
                }
            }
            else {
                if (tar_buf[i] != 0.5) {
                    printf("tar_buf[i] = %f (expected 0.5)\n", tar_buf[i]);
                    errors++;
                }
            }
        }
    }

    MPI_Type_free(&vector_dtp);

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
        MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
        MPI_Accumulate(orig_buf, DATA_SIZE, MPI_DOUBLE, 1, 0, DATA_SIZE, MPI_DOUBLE, MPI_SUM, win);
        MPI_Win_unlock(1, win);
    }

    MPI_Win_fence(0, win);

  if (rank == 1) {
        for (i = 0; i < DATA_SIZE; i++) {
            if (i % 5 < 3) {
                if (tar_buf[i] != 2.5) {
                    printf("tar_buf[i] = %f (expected 2.5)\n", tar_buf[i]);
                    errors++;
                }
            }
            else {
                if (tar_buf[i] != 1.5) {
                    printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]);
                    errors++;
                }
            }
        }
    }

    MPI_Win_free(&win);

    MPI_Free_mem(orig_buf);
    MPI_Free_mem(tar_buf);

    if (rank == 1) {
        if (errors == 0)
            printf(" No Errors\n");
    }

    MPI_Finalize();
    return 0;
}
Пример #28
0
int main(int argc, char ** argv)
{
  long Block_order;        /* number of columns owned by rank       */
  long Block_size;         /* size of a single block                */
  long Colblock_size;      /* size of column block                  */
  int Tile_order=32;       /* default Tile order                    */
  int tiling;              /* boolean: true if tiling is used       */
  int Num_procs;           /* number of ranks                       */
  long order;              /* order of overall matrix               */
  int send_to, recv_from;  /* ranks with which to communicate       */
  long bytes;              /* combined size of matrices             */
  int my_ID;               /* rank                                  */
  int root=0;              /* rank of root                          */
  int iterations;          /* number of times to do the transpose   */
  int i, j, it, jt, istart;/* dummies                               */
  int iter;                /* index of iteration                    */
  int phase;               /* phase inside staged communication     */
  int colstart;            /* starting column for owning rank       */
  int error;               /* error flag                            */
  double RESTRICT *A_p;    /* original matrix column block          */
  double RESTRICT *B_p;    /* transposed matrix column block        */
  double RESTRICT *Work_in_p;/* workspace for transpose function    */
  double RESTRICT *Work_out_p;/* workspace for transpose function   */
  double abserr,           /* absolute error                        */
         abserr_tot;       /* aggregate absolute error              */
  double epsilon = 1.e-8;  /* error tolerance                       */
  double local_trans_time, /* timing parameters                     */
         trans_time,
         avgtime;
  MPI_Win  rma_win = MPI_WIN_NULL;
  MPI_Info rma_winfo = MPI_INFO_NULL;
  int passive_target = 0;  /* use passive target RMA sync           */
#if MPI_VERSION >= 3
  int  flush_local  = 1;   /* flush local (or remote) after put     */
  int  flush_bundle = 1;   /* flush every <bundle> put calls        */
#endif

/*********************************************************************
** Initialize the MPI environment
*********************************************************************/
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_ID);
  MPI_Comm_size(MPI_COMM_WORLD, &Num_procs);

/*********************************************************************
** process, test and broadcast input parameters
*********************************************************************/
  error = 0;
  if (my_ID == root) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("MPIRMA matrix transpose: B = A^T\n");

    if (argc <= 3){
      printf("Usage: %s <# iterations> <matrix order> [Tile size]"
             "[sync (0=fence, 1=flush)] [flush local?] [flush bundle]\n",
             *argv);
      error = 1; goto ENDOFTESTS;
    }

    iterations  = atoi(*++argv);
    if(iterations < 1){
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
      error = 1; goto ENDOFTESTS;
    }

    order = atol(*++argv);
    if (order < Num_procs) {
      printf("ERROR: matrix order %ld should at least # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }
    if (order%Num_procs) {
      printf("ERROR: matrix order %ld should be divisible by # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }

    if (argc >= 4) Tile_order     = atoi(*++argv);
    if (argc >= 5) passive_target = atoi(*++argv);
#if MPI_VERSION >= 3
    if (argc >= 6) flush_local    = atoi(*++argv);
    if (argc >= 7) flush_bundle   = atoi(*++argv);
#endif

    ENDOFTESTS:;
  }
  bail_out(error);

  if (my_ID == root) {
    printf("Number of ranks      = %d\n", Num_procs);
    printf("Matrix order         = %ld\n", order);
    printf("Number of iterations = %d\n", iterations);
    if ((Tile_order > 0) && (Tile_order < order))
          printf("Tile size            = %d\n", Tile_order);
    else  printf("Untiled\n");
    if (passive_target) {
#if MPI_VERSION < 3
        printf("Synchronization      = MPI_Win_(un)lock\n");
#else
        printf("Synchronization      = MPI_Win_flush%s (bundle=%d)\n", flush_local ? "_local" : "", flush_bundle);
#endif
    } else {
        printf("Synchronization      = MPI_Win_fence\n");
    }
  }

  /*  Broadcast input data to all ranks */
  MPI_Bcast (&order,          1, MPI_LONG, root, MPI_COMM_WORLD);
  MPI_Bcast (&iterations,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&Tile_order,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&passive_target, 1, MPI_INT,  root, MPI_COMM_WORLD);
#if MPI_VERSION >= 3
  MPI_Bcast (&flush_local,    1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&flush_bundle,   1, MPI_INT,  root, MPI_COMM_WORLD);
#endif

  /* a non-positive tile size means no tiling of the local transpose */
  tiling = (Tile_order > 0) && (Tile_order < order);
  bytes = 2 * sizeof(double) * order * order;

/*********************************************************************
** The matrix is broken up into column blocks that are mapped one to a
** rank.  Each column block is made up of Num_procs smaller square
** blocks of order block_order.
*********************************************************************/

  Block_order    = order/Num_procs;
  colstart       = Block_order * my_ID;
  Colblock_size  = order * Block_order;
  Block_size     = Block_order * Block_order;

  /* debug message size effects */
  if (my_ID == root) {
    printf("Block_size           = %ld\n", Block_size);
  }

/*********************************************************************
** Create the column block of the test matrix, the row block of the
** transposed matrix, and workspace (workspace only if #procs>1)
*********************************************************************/
  A_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (A_p == NULL){
    printf(" Error allocating space for original matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  MPI_Info_create (&rma_winfo);
  MPI_Info_set (rma_winfo, "no locks", "true");
  B_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (B_p == NULL){
    printf(" Error allocating space for transpose matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  if (Num_procs>1) {
    Work_out_p = (double *) prk_malloc(Block_size*(Num_procs-1)*sizeof(double));
    if (Work_out_p == NULL){
      printf(" Error allocating space for work_out on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);

    PRK_Win_allocate(Block_size*(Num_procs-1)*sizeof(double), sizeof(double),
                     rma_winfo, MPI_COMM_WORLD, &Work_in_p, &rma_win);
    if (Work_in_p == NULL){
      printf(" Error allocating space for work on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);
  }

#if MPI_VERSION >= 3
  if (passive_target && Num_procs>1) {
    MPI_Win_lock_all(MPI_MODE_NOCHECK,rma_win);
  }
#endif

  /* Fill the original column matrix                                                */
  istart = 0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      A(i,j) = (double) (order*(j+colstart) + i);
      B(i,j) = 0.0;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  for (iter = 0; iter<=iterations; iter++) {

    /* start timer after a warmup iteration                                        */
    if (iter == 1) {
      MPI_Barrier(MPI_COMM_WORLD);
      local_trans_time = wtime();
    }

    /* do the local transpose                                                     */
    istart = colstart;
    if (!tiling) {
      for (i=0; i<Block_order; i++) {
        for (j=0; j<Block_order; j++) {
          B(j,i) += A(i,j);
          A(i,j) += 1.0;
        }
      }
    } else {
      for (i=0; i<Block_order; i+=Tile_order) {
        for (j=0; j<Block_order; j+=Tile_order) {
          for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
            for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
              B(jt,it) += A(it,jt);
              A(it,jt) += 1.0;
            }
          }
        }
      }
    }

    if (!passive_target && Num_procs>1) {
      MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, rma_win);
    }

    for (phase=1; phase<Num_procs; phase++){
      send_to = (my_ID - phase + Num_procs)%Num_procs;

      istart = send_to*Block_order;
      if (!tiling) {
        for (i=0; i<Block_order; i++) {
          for (j=0; j<Block_order; j++) {
            Work_out(phase-1,j,i) = A(i,j);
            A(i,j) += 1.0;
          }
        }
      } else {
        for (i=0; i<Block_order; i+=Tile_order) {
          for (j=0; j<Block_order; j+=Tile_order) {
            for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
              for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
                Work_out(phase-1,jt,it) = A(it,jt);
                A(it,jt) += 1.0;
              }
            }
          }
        }
      }

#if MPI_VERSION < 3
      if (passive_target) {
          MPI_Win_lock(MPI_LOCK_SHARED, send_to, MPI_MODE_NOCHECK, rma_win);
      }
#endif
      MPI_Put(Work_out_p+Block_size*(phase-1), Block_size, MPI_DOUBLE, send_to,
              Block_size*(phase-1), Block_size, MPI_DOUBLE, rma_win);

      if (passive_target) {
#if MPI_VERSION < 3
        MPI_Win_unlock(send_to, rma_win);
#else
        if (flush_bundle==1) {
          if (flush_local==1) {
              MPI_Win_flush_local(send_to, rma_win);
          } else {
              MPI_Win_flush(send_to, rma_win);
          }
        } else if ( (phase%flush_bundle) == 0) {
          /* Too lazy to record all targets, so let MPI do it internally (hopefully) */
          if (flush_local==1) {
              MPI_Win_flush_local_all(rma_win);
          } else {
              MPI_Win_flush_all(rma_win);
          }
        }
#endif
      }
    }  /* end of phase loop for puts  */
    if (Num_procs>1) {
      if (passive_target) {
#if MPI_VERSION >= 3
          MPI_Win_flush_all(rma_win);
#endif
          MPI_Barrier(MPI_COMM_WORLD);
      } else {
          MPI_Win_fence(MPI_MODE_NOSTORE, rma_win);
      }
    }

    for (phase=1; phase<Num_procs; phase++) {
      recv_from = (my_ID + phase)%Num_procs;
      istart = recv_from*Block_order;
      /* scatter received block to transposed matrix; no need to tile */
      for (j=0; j<Block_order; j++) {
        for (i=0; i<Block_order; i++) {
          B(i,j) += Work_in(phase-1,i,j);
        }
      }
    } /* end of phase loop for scatters */

    /* for the flush case we need to make sure we have consumed Work_in
       before overwriting it in the next iteration                    */
    if (Num_procs>1 && passive_target) {
      MPI_Barrier(MPI_COMM_WORLD);
    }

  } /* end of iterations */

  local_trans_time = wtime() - local_trans_time;
  MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root,
             MPI_COMM_WORLD);

  abserr = 0.0;
  istart = 0;
  double addit = ((double)(iterations+1) * (double) (iterations))/2.0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      abserr += ABS(B(i,j) - ((double)(order*i + j+colstart)*(iterations+1)+addit));
    }
  }

  MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD);

  if (my_ID == root) {
    if (abserr_tot < epsilon) {
      printf("Solution validates\n");
      avgtime = trans_time/(double)iterations;
      printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime);
    }
    else {
      printf("ERROR: Aggregate absolute error %lf exceeds threshold %e\n", abserr_tot, epsilon);
      error = 1;
    }
  }

  bail_out(error);

  if (rma_win!=MPI_WIN_NULL) {
#if MPI_VERSION >=3
    if (passive_target) {
      MPI_Win_unlock_all(rma_win);
    }
#endif
    PRK_Win_free(&rma_win);
  }

  MPI_Finalize();
  exit(EXIT_SUCCESS);

}  /* end of main */
Пример #29
0
int main(int argc, char *argv[])
{
    int rank, nproc;
    int i;
    MPI_Win win;
    int *tar_buf = NULL;
    int *orig_buf = NULL;
    MPI_Datatype derived_dtp;
    int errors = 0;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (nproc < 3) {
        fprintf(stderr, "Run this program with at least 3 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &orig_buf);
    MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &tar_buf);

    for (i = 0; i < DATA_SIZE; i++) {
        orig_buf[i] = 1;
        tar_buf[i] = 0;
    }

    MPI_Type_vector(COUNT, BLOCKLENGTH - 1, STRIDE, MPI_INT, &derived_dtp);
    MPI_Type_commit(&derived_dtp);

    MPI_Win_create(tar_buf, sizeof(int) * DATA_SIZE, sizeof(int),
                   MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    /***** test between rank 0 and rank 1 *****/

    if (rank == 1) {
        MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);

        for (i = 0; i < OPS_NUM; i++) {
            MPI_Accumulate(orig_buf, 1, derived_dtp,
                           0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win);
            MPI_Win_flush_local(0, win);
        }

        MPI_Win_unlock(0, win);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* check results */
    if (rank == 0) {
        for (i = 0; i < DATA_SIZE - COUNT; i++) {
            if (tar_buf[i] != OPS_NUM) {
                printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM);
                errors++;
            }
        }
    }

    for (i = 0; i < DATA_SIZE; i++) {
        tar_buf[i] = 0;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /***** test between rank 0 and rank 2 *****/

    if (rank == 2) {
        MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);

        for (i = 0; i < OPS_NUM; i++) {
            MPI_Accumulate(orig_buf, 1, derived_dtp,
                           0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win);
            MPI_Win_flush_local(0, win);
        }

        MPI_Win_unlock(0, win);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* check results */
    if (rank == 0) {
        for (i = 0; i < DATA_SIZE - COUNT; i++) {
            if (tar_buf[i] != OPS_NUM) {
                printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM);
                errors++;
            }
        }

        if (errors == 0)
            printf(" No Errors\n");
    }

    MPI_Win_free(&win);

    MPI_Type_free(&derived_dtp);

    MPI_Free_mem(orig_buf);
    MPI_Free_mem(tar_buf);

    MPI_Finalize();

    return 0;
}
Пример #30
0
int main(int argc, char ** argv)
{
  MPI_Aint win_size = WIN_SIZE;
  MPI_Win win;
  MPI_Group group;
  char* base;
  int disp_unit = 1;
  int rank, size, target_rank, target_disp = 1;
  int r, flag;

  /*************************************************************/
  /* Init and set values */
  /*************************************************************/
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  target_rank = (rank + 1) % size;
  MPI_Alloc_mem(WIN_SIZE, MPI_INFO_NULL, &base);
  if ( NULL == base )
  {
    printf("failed to alloc %d\n", WIN_SIZE);
    exit(16);
  }


  /*************************************************************/
  /* Win_create */
  /*************************************************************/
  /* MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
     MPI_Comm comm, MPI_Win *win); */
  r = MPI_Win_create(base, win_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); 
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_create\n", rank);

  /*************************************************************/
  /* First epoch: Tests Put, Get, Get_group, Post, Start,      */
  /*              Complete, Wait, Lock, Unlock                 */
  /*************************************************************/
  r = MPI_Win_get_group(win, &group);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_get_group\n", rank);

  r = MPI_Win_post(group, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_post\n", rank);

  r = MPI_Win_start(group, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_start\n", rank);

  r = MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_lock\n", rank);

  /* MPI_Put(void *origin_addr, int origin_count, MPI_Datatype
     origin_datatype, int target_rank, MPI_Aint target_disp,
     int target_count, MPI_Datatype target_datatype, MPI_Win win) */
  r = MPI_Put(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp,
     WIN_SIZE, MPI_BYTE, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Put\n", rank);

  r = MPI_Win_unlock(target_rank, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_unlock\n", rank);

  /* MPI_Get(void *origin_addr, int origin_count, MPI_Datatype
     origin_datatype, int target_rank, MPI_Aint target_disp,
     int target_count, MPI_Datatype target_datatype, MPI_Win win); */
  r = MPI_Get(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp,
      WIN_SIZE, MPI_BYTE, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Get\n", rank);

  r = MPI_Win_complete(win);
  if ( MPI_SUCCESS TEST_OP r ) 
    printf("Rank %d failed MPI_Win_complete\n", rank);

  r = MPI_Win_test(win, &flag);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_test\n", rank);

  r = MPI_Win_wait(win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_wait\n", rank);

  /*************************************************************************/
  /* Second epoch: Tests Accumulate and Fence */
  /*************************************************************************/
  r = MPI_Win_fence(0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank);

  if ( rank == 0 )
  {
    /* MPI_Accumulate(void *origin_addr, int origin_count, MPI_Datatype
       origin_datatype, int target_rank, MPI_Aint target_disp, 
       int target_count, MPI_Datatype target_datatype, 
       MPI_Op op, MPI_Win win) */
    r = MPI_Accumulate(base, WIN_SIZE, MPI_BYTE, 0,
        target_disp, WIN_SIZE, MPI_BYTE, MPI_SUM, win);
    if ( MPI_SUCCESS TEST_OP r ) 
      printf("Rank %d failed MPI_Accumulate\n", rank);
  }
  r = MPI_Win_fence(0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank);


  /*************************************************************/
  /* Win_free and Finalize */
  /*************************************************************/
  r = MPI_Win_free(&win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_free\n", rank);

  free(base);

  MPI_Finalize();
}