Пример #1
0
int main(int argc, char **argv){
    MPI_Init(&argc, &argv);
    int rank, nproc;
    
    MPI_Comm_size(MPI_COMM_WORLD, &(nproc));
    MPI_Comm_rank(MPI_COMM_WORLD, &(rank));
    MPI_Win win;
    MPI_Aint remote;
        MPI_Aint local;
    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);
    
    if(rank==0){
        //int *a = (int*)malloc(sizeof(int)); 
        int a= 4;
        MPI_Win_attach(win, &a, sizeof(int));
        MPI_Get_address(&a,  &local);
        MPI_Send(&local, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD);
    }
    else{
        //MPI_Status reqstat;
        //MPI_Recv(&sdisp_remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat );
        int val;
        MPI_Status reqstat;
        MPI_Recv(&remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat );
        MPI_Get(&val, 1, MPI_INT, 0, remote, 1, MPI_INT, win);
        
    }
    //MPI_Win_free(&win);
}
Пример #2
0
void allocate_memory(int rank, char *rbuf, int size, WINDOW type, MPI_Win *win)
{
    MPI_Status  reqstat;

    switch (type){
        case WIN_DYNAMIC:
            MPI_CHECK(MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, win));
            MPI_CHECK(MPI_Win_attach(*win, (void *)rbuf, size));
            MPI_CHECK(MPI_Get_address(rbuf, &sdisp_local));
            if(rank == 0){
                MPI_CHECK(MPI_Send(&sdisp_local, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD));
                MPI_CHECK(MPI_Recv(&sdisp_remote, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD, &reqstat));
            }
            else{
                MPI_CHECK(MPI_Recv(&sdisp_remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat));
                MPI_CHECK(MPI_Send(&sdisp_local, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD));
            }
            break;
        case WIN_CREATE:
            MPI_CHECK(MPI_Win_create(rbuf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win));
            break;
        default:
            MPI_CHECK(MPI_Win_allocate(size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, rbuf, win));
            break;
    }
}
Пример #3
0
JNIEXPORT jlong JNICALL Java_mpi_Win_createDynamicWin(
        JNIEnv *env, jobject jthis,
        jlong info, jlong comm)
{
    MPI_Win win;

    int rc = MPI_Win_create_dynamic(
                            (MPI_Info)info, (MPI_Comm)comm, &win);

    ompi_java_exceptionCheck(env, rc);
    return (jlong)win;
}
Пример #4
0
int main(int argc, char **argv)
{
    int i, rank, nproc;
    int errors = 0, all_errors = 0;
    int val = 0, one = 1;
    int iter;
    MPI_Aint *val_ptrs;
    MPI_Win dyn_win;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    iter = ITER_PER_RANK * nproc;

    val_ptrs = malloc(nproc * sizeof(MPI_Aint));
    MPI_Get_address(&val, &val_ptrs[rank]);

    MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, val_ptrs, 1, MPI_AINT, MPI_COMM_WORLD);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &dyn_win);
    MPI_Win_attach(dyn_win, &val, sizeof(int));

    for (i = 0; i < iter; i++) {
        MPI_Win_fence(MPI_MODE_NOPRECEDE, dyn_win);
        MPI_Accumulate(&one, 1, MPI_INT, i % nproc, val_ptrs[i % nproc], 1, MPI_INT, MPI_SUM,
                       dyn_win);
        MPI_Win_fence(MPI_MODE_NOSUCCEED, dyn_win);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Read and verify my data */
    if (val != iter) {
        errors++;
        printf("%d -- Got %d, expected %d\n", rank, val, iter);
    }

    MPI_Win_detach(dyn_win, &val);
    MPI_Win_free(&dyn_win);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    free(val_ptrs);
    MPI_Finalize();

    return 0;
}
Пример #5
0
void ompi_win_create_dynamic_f(MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win,
		              MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Win c_win;
    MPI_Info c_info;
    MPI_Comm c_comm;

    c_comm = MPI_Comm_f2c(*comm);
    c_info = MPI_Info_f2c(*info);

    c_ierr = MPI_Win_create_dynamic(c_info, c_comm, &c_win);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    if (MPI_SUCCESS == c_ierr) {
       *win = MPI_Win_c2f(c_win);
    }
}
Пример #6
0
int main(int argc, char **argv) {
    int           procid, nproc, i;
    MPI_Win       llist_win;
    llist_ptr_t   head_ptr, tail_ptr;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &procid);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);

    /* Process 0 creates the head node */
    if (procid == 0)
        head_ptr.disp = alloc_elem(-1, llist_win);

    /* Broadcast the head pointer to everyone */
    head_ptr.rank = 0;
    MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
    tail_ptr = head_ptr;

    /* All processes concurrently append NUM_ELEMS elements to the list */
    for (i = 0; i < NUM_ELEMS; i++) {
        llist_ptr_t new_elem_ptr;
        int success;

        /* Create a new list element and register it with the window */
        new_elem_ptr.rank = procid;
        new_elem_ptr.disp = alloc_elem(procid, llist_win);

        /* Append the new node to the list.  This might take multiple attempts if
           others have already appended and our tail pointer is stale. */
        do {
            llist_ptr_t next_tail_ptr = nil;

            MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

            MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank,
                                  (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank,
                                  (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win);

            MPI_Win_unlock(tail_ptr.rank, llist_win);
            success = (next_tail_ptr.rank == nil.rank);

            if (success) {
                int i, flag;
                MPI_Aint result;

                MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

                MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank,
                                  (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
                                  MPI_REPLACE, llist_win);

                /* Note: accumulate is faster, since we don't need the result.  Replacing with
                   Fetch_and_op to create a more complete test case. */
                /*
                MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank,
                               (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1,
                               MPI_AINT, MPI_REPLACE, llist_win);
                */

                MPI_Win_unlock(tail_ptr.rank, llist_win);
                tail_ptr = new_elem_ptr;

                /* For implementations that use pt-to-pt messaging, force progress for other threads'
                   RMA operations. */
                for (i = 0; i < NPROBE; i++)
                    MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);

            } else {
                /* Tail pointer is stale, fetch the displacement.  May take multiple tries
                   if it is being updated. */
                do {
                    MPI_Aint junk = 0;

                    MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

                    MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank,
                                      (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
                                      MPI_NO_OP, llist_win);

                    MPI_Win_unlock(tail_ptr.rank, llist_win);
                } while (next_tail_ptr.disp == nil.disp);
                tail_ptr = next_tail_ptr;
            }
        } while (!success);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Traverse the list and verify that all processes inserted exactly the correct
       number of elements. */
    if (procid == 0) {
        int  have_root = 0;
        int  errors    = 0;
        int *counts, count = 0;

        counts = (int*) malloc(sizeof(int) * nproc);
        assert(counts != NULL);

        for (i = 0; i < nproc; i++)
            counts[i] = 0;

        tail_ptr = head_ptr;

        /* Walk the list and tally up the number of elements inserted by each rank */
        while (tail_ptr.disp != nil.disp) {
            llist_elem_t elem;

            MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);

            MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
                    tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);

            MPI_Win_unlock(tail_ptr.rank, llist_win);

            tail_ptr = elem.next;

            /* This is not the root */
            if (have_root) {
                assert(elem.value >= 0 && elem.value < nproc);
                counts[elem.value]++;
                count++;

                if (verbose) {
                    int last_elem = tail_ptr.disp == nil.disp;
                    printf("%2d%s", elem.value, last_elem ? "" : " -> ");
                    if (count % ELEM_PER_ROW == 0 && !last_elem)
                        printf("\n");
                }
            }

            /* This is the root */
            else {
                assert(elem.value == -1);
                have_root = 1;
            }
        }

        if (verbose)
          printf("\n\n");

        /* Verify the counts we collected */
        for (i = 0; i < nproc; i++) {
            int expected = NUM_ELEMS;

            if (counts[i] != expected) {
                printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected);
                errors++;
            }
        }

        printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
        free(counts);
    }

    MPI_Win_free(&llist_win);

    /* Free all the elements in the list */
    for ( ; my_elems_count > 0; my_elems_count--)
        MPI_Free_mem(my_elems[my_elems_count-1]);

    MPI_Finalize();
    return 0;
}
Пример #7
0
/**
 *  TODO: Differentiate units belonging to team_id and that not
 *  belonging to team_id
 *  within the function or outside it?
 *  FIX: Outside it.
 *
 *  The teamid stands for a superteam related to the new generated
 *  newteam.
 */
dart_ret_t dart_team_create(
  dart_team_t teamid,
  const dart_group_t* group,
  dart_team_t *newteam)
{
  MPI_Comm comm;
  MPI_Comm subcomm;
  MPI_Win win;
  uint16_t index, unique_id;
  size_t size;
  dart_team_t max_teamid = -1;
  dart_unit_t sub_unit, unit;

  dart_myid (&unit);
  dart_size (&size);
  dart_team_myid (teamid, &sub_unit);

  int result = dart_adapt_teamlist_convert (teamid, &unique_id);
  if (result == -1) {
    return DART_ERR_INVAL;
  }
  comm = dart_teams[unique_id];
  subcomm = MPI_COMM_NULL;

  MPI_Comm_create (comm, group -> mpi_group, &subcomm);

  *newteam = DART_TEAM_NULL;

  /* Get the maximum next_availteamid among all the units belonging to
   * the parent team specified by 'teamid'. */
  MPI_Allreduce(
    &dart_next_availteamid,
    &max_teamid,
    1,
    MPI_INT32_T,
    MPI_MAX,
    comm);
  dart_next_availteamid = max_teamid + 1;

  if (subcomm != MPI_COMM_NULL) {
    int result = dart_adapt_teamlist_alloc(max_teamid, &index);
    if (result == -1) {
      return DART_ERR_OTHER;
    }
    /* max_teamid is thought to be the new created team ID. */
    *newteam = max_teamid;
    dart_teams[index] = subcomm;
    MPI_Win_create_dynamic(MPI_INFO_NULL, subcomm, &win);
    dart_win_lists[index] = win;
  }
#if 0
  /* Another way of generating the available teamID for the newly crated team. */
  if (subcomm != MPI_COMM_NULL)
  {
    /* Get the maximum next_availteamid among all the units belonging to the
     * created sub-communicator. */
    MPI_Allreduce (&next_availteamid, &max_teamid, 1, MPI_INT, MPI_MAX, subcomm);
    int result = dart_adapt_teamlist_alloc (max_teamid, &index);

    if (result == -1)
    {
      return DART_ERR_OTHER;
    }

    *newteam = max_teamid;
    teams[index] = subcomm;
    MPI_Comm_rank (subcomm, &rank);

    if (rank == 0)
    {
      root = sub_unit;
      if (sub_unit != 0)
      {
        MPI_Send (&root, 1, MPI_INT, 0, 0, comm);
      }
    }

    next_availteamid = max_teamid + 1;
  }

  if (sub_unit == 0)
  {
    if (root == -1)
    {
      MPI_Recv (&root, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, MPI_STATUS_IGNORE);
    }
  }

  MPI_Bcast (&root, 1, MPI_INT, 0, comm);

  /* Broadcast the calculated max_teamid to all the units not belonging to the
   * sub-communicator. */
  MPI_Bcast (&max_teamid, 1, MPI_INT, root, comm);
  if (subcomm == MPI_COMM_NULL)
  {
    /* 'Next_availteamid' is changed iff it is smaller than 'max_teamid + 1' */
    if (max_teamid + 1 > next_availteamid)
    {
      next_availteamid = max_teamid + 1;
    }
  }
#endif

  if (subcomm != MPI_COMM_NULL) {
#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS)
    int    i;
    size_t n;

    MPI_Comm sharedmem_comm;
    MPI_Group sharedmem_group, group_all;
    MPI_Comm_split_type(
      subcomm,
      MPI_COMM_TYPE_SHARED,
      1,
      MPI_INFO_NULL,
      &sharedmem_comm);
    dart_sharedmem_comm_list[index] = sharedmem_comm;
    if (sharedmem_comm != MPI_COMM_NULL) {
      MPI_Comm_size(
        sharedmem_comm,
        &(dart_sharedmemnode_size[index]));

  //    dart_unit_mapping[index] = (int*)malloc (
  //      dart_sharedmem_size[index] * sizeof (int));

      MPI_Comm_group(sharedmem_comm, &sharedmem_group);
      MPI_Comm_group(MPI_COMM_WORLD, &group_all);

      int* dart_unit_mapping = (int *)malloc (
        dart_sharedmemnode_size[index] * sizeof (int));
      int* sharedmem_ranks = (int*)malloc (
        dart_sharedmemnode_size[index] * sizeof (int));

      dart_sharedmem_table[index] = (int*)malloc(size * sizeof(int));

      for (i = 0; i < dart_sharedmemnode_size[index]; i++) {
        sharedmem_ranks[i] = i;
      }

  //    MPI_Group_translate_ranks (sharedmem_group, dart_sharedmem_size[index],
  //        sharedmem_ranks, group_all, dart_unit_mapping[index]);
      MPI_Group_translate_ranks(
        sharedmem_group,
        dart_sharedmemnode_size[index],
        sharedmem_ranks,
        group_all,
        dart_unit_mapping);

      for (n = 0; n < size; n++) {
        dart_sharedmem_table[index][n] = -1;
      }
      for (i = 0; i < dart_sharedmemnode_size[index]; i++) {
        dart_sharedmem_table[index][dart_unit_mapping[i]] = i;
      }
      free (sharedmem_ranks);
      free (dart_unit_mapping);
    }

#endif
    MPI_Win_lock_all(0, win);
    DART_LOG_DEBUG ("%2d: TEAMCREATE  - create team %d out of parent team %d",
           unit, *newteam, teamid);
  }
  return DART_OK;
}
Пример #8
0
void
BoxLib::Initialize (int& argc, char**& argv, bool build_parm_parse, MPI_Comm mpi_comm)
{
    ParallelDescriptor::StartParallel(&argc, &argv, mpi_comm);

#ifndef WIN32
    //
    // Make sure to catch new failures.
    //
    std::set_new_handler(BoxLib::OutOfMemory);

    if (argv[0][0] != '/') {
	char temp[1024];
	getcwd(temp,1024);
	exename = temp;
	exename += "/";
    }
    exename += argv[0];
#endif

#ifdef BL_USE_UPCXX
    upcxx::init(&argc, &argv);
    if (upcxx::myrank() != ParallelDescriptor::MyProc())
	BoxLib::Abort("UPC++ rank != MPI rank");
#endif

#ifdef BL_USE_MPI3
    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::cp_win);
    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::fb_win);
    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::fpb_win);
#endif

    while (!The_Initialize_Function_Stack.empty())
    {
        //
        // Call the registered function.
        //
        (*The_Initialize_Function_Stack.top())();
        //
        // And then remove it from the stack.
        //
        The_Initialize_Function_Stack.pop();
    }

    if(ParallelDescriptor::NProcsSidecar() > 0) {
      if(ParallelDescriptor::InSidecarGroup()) {
        if (ParallelDescriptor::IOProcessor())
          std::cout << "===== SIDECARS INITIALIZED =====" << std::endl;
        ParallelDescriptor::SidecarProcess();
        BoxLib::Finalize();
        return;
      }
    }

    BL_PROFILE_INITIALIZE();

    //
    // Initialize random seed after we're running in parallel.
    //
    BoxLib::InitRandom(ParallelDescriptor::MyProc()+1, ParallelDescriptor::NProcs());

#ifdef BL_USE_MPI
    if (ParallelDescriptor::IOProcessor())
    {
        std::cout << "MPI initialized with "
                  << ParallelDescriptor::NProcs()
                  << " MPI processes\n";
    }
#endif

#ifdef _OPENMP
    if (ParallelDescriptor::IOProcessor())
    {
        std::cout << "OMP initialized with "
                  << omp_get_max_threads()
                  << " OMP threads\n";
    }
#endif

    signal(SIGSEGV, BLBackTrace::handler); // catch seg falult
    signal(SIGINT,  BLBackTrace::handler);

#ifndef BL_AMRPROF
    if (build_parm_parse)
    {
        if (argc == 1)
        {
            ParmParse::Initialize(0,0,0);
        }
        else
        {
            if (strchr(argv[1],'='))
            {
                ParmParse::Initialize(argc-1,argv+1,0);
            }
            else
            {
                ParmParse::Initialize(argc-2,argv+2,argv[1]);
            }
        }
    }

    {
	ParmParse pp("boxlib");
	pp.query("v", verbose);
	pp.query("verbose", verbose);

	int invalid = 0, divbyzero=0, overflow=0;
	pp.query("fpe_trap_invalid", invalid);
	pp.query("fpe_trap_zero", divbyzero);
	pp.query("fpe_trap_overflow", overflow);
	int flags = 0;
	if (invalid)   flags |= FE_INVALID;
	if (divbyzero) flags |= FE_DIVBYZERO;
	if (overflow)  flags |= FE_OVERFLOW;
#if defined(__linux__)
#if !defined(__PGI) || (__PGIC__ >= 16)
	if (flags != 0) {
	    feenableexcept(flags);  // trap floating point exceptions
	    signal(SIGFPE,  BLBackTrace::handler);
	}
#endif
#endif
    }

    ParallelDescriptor::StartTeams();

    ParallelDescriptor::StartSubCommunicator();

    mempool_init();

#endif

    std::cout << std::setprecision(10);

    if (double(std::numeric_limits<long>::max()) < 9.e18)
    {
	if (ParallelDescriptor::IOProcessor())
	{
	    std::cout << "!\n! WARNING: Maximum of long int, "
		      << std::numeric_limits<long>::max() 
		      << ", might be too small for big runs.\n!\n";
	}
    }

#if defined(BL_USE_FORTRAN_MPI) || defined(BL_USE_F_INTERFACES)
    int fcomm = MPI_Comm_c2f(ParallelDescriptor::Communicator());
    bl_fortran_mpi_comm_init (fcomm);
#endif

#if defined(BL_MEM_PROFILING) && defined(BL_USE_F_BASELIB)
    MemProfiler_f::initialize();
#endif
}
void run_rma_test(int nprocs_per_node)
{
  int myrank, nprocs;
  int mem_rank;
  MPI_Win win;
  int *baseptr;
  MPI_Aint local_size;

  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (nprocs < nprocs_per_node * 2)
  {
    if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2);
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }


  mem_rank = nprocs_per_node + nprocs_per_node / 2;

  local_size = (myrank == mem_rank) ? COUNT : 0;

  MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);

  MPI_Win_lock_all(0, win);



  int type_size;
  MPI_Type_size(MPI_INT, &type_size);

  size_t nbytes = COUNT * type_size;

  assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS);
  assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS);

  MPI_Aint ldisp;
  MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint));

  assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS);

  assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS);

  if (myrank == 0)
  {
    for (size_t idx = 0; idx < COUNT; ++idx) {
      baseptr[idx] = idx * COUNT + 1;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (myrank == mem_rank) {
    assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS);
    assert(MPI_Win_flush(0, win) == MPI_SUCCESS);

    for (size_t idx = 0; idx < COUNT; ++idx) {
      assert(baseptr[idx] == idx * 10 + 1);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_unlock_all(win);

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_free(&win);

  MPI_Free_mem(baseptr);

  printf("Test finished\n");
}
Пример #10
0
int main(int argc, char **argv)
{
    int rank, nproc;
    int errs = 0;
    int array[1024];
    int val = 0;
    int target_rank;
    MPI_Aint bases[2];
    MPI_Aint disp, offset;
    MPI_Win  win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (rank == 0 && nproc != 2) {
        MTestError("Must run with 2 ranks\n");
    }

    /* Get the base address in the middle of the array */
    if (rank == 0) {
        target_rank = 1;
        array[0] = 1234;
        MPI_Get_address(&array[512], &bases[0]);
    } else if (rank == 1) {
        target_rank = 0;
        array[1023] = 1234;
        MPI_Get_address(&array[512], &bases[1]);
    }

    /* Exchange bases */
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, bases, 1, MPI_AINT, MPI_COMM_WORLD);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);
    MPI_Win_attach(win, array, sizeof(int)*1024);

    /* Do MPI_Aint addressing arithmetic */
    if (rank == 0) {
        disp = sizeof(int)*511;
        offset = MPIX_Aint_add(bases[1], disp); /* offset points to array[1023]*/
    } else if (rank == 1) {
        disp = sizeof(int)*512;
        offset = MPIX_Aint_diff(bases[0], disp); /* offset points to array[0] */
    }

    /* Get val and verify it */
    MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
    MPI_Get(&val, 1, MPI_INT, target_rank, offset, 1, MPI_INT, win);
    MPI_Win_fence(MPI_MODE_NOSUCCEED, win);

    if (val != 1234) {
        errs++;
        printf("%d -- Got %d, expected 1234\n", rank, val);
    }

    MPI_Win_detach(win, array);
    MPI_Win_free(&win);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Пример #11
0
int main(int argc, char **argv) {
    int           procid, nproc, i, j, my_nelem;
    int           pollint = 0;
    double        time;
    MPI_Win       llist_win;
    llist_ptr_t   head_ptr, tail_ptr;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &procid);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);

    /* Process 0 creates the head node */
    if (procid == 0)
        head_ptr.disp = alloc_elem(procid, llist_win);

    /* Broadcast the head pointer to everyone */
    head_ptr.rank = 0;
    MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
    tail_ptr = head_ptr;

    /* All processes append NUM_ELEMS elements to the list; rank 0 has already
     * appended an element. */
    if (procid == 0)
        i = 1;
    else
        i = 0;
    my_nelem = NUM_ELEMS/nproc;
    if (procid < NUM_ELEMS % nproc)
        my_nelem++;

    MPI_Barrier(MPI_COMM_WORLD);
    time = MPI_Wtime();

    for ( ; i < my_nelem; i++) {
        llist_ptr_t new_elem_ptr;
        int success = 0;

        /* Create a new list element and register it with the window */
        new_elem_ptr.rank = procid;
        new_elem_ptr.disp = alloc_elem(procid, llist_win);

        /* Append the new node to the list.  This might take multiple attempts if
           others have already appended and our tail pointer is stale. */
        do {
            int flag;

            /* The tail is at my left neighbor, append my element. */
            if (tail_ptr.rank == (procid + nproc-1) % nproc)
            {
                if (verbose)
                    printf("%d: Appending to <%d, %p>\n", procid, tail_ptr.rank, (void*) tail_ptr.disp);

                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
#if USE_ACC
                MPI_Accumulate(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                               (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t),
                               MPI_BYTE, MPI_REPLACE, llist_win);
#else
                MPI_Put(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                        (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t),
                        MPI_BYTE, llist_win);
#endif
                MPI_Win_unlock(tail_ptr.rank, llist_win);

                success = 1;
                tail_ptr = new_elem_ptr;
            }

            /* Otherwise, chase the tail. */
            else
            {
                llist_ptr_t next_tail_ptr;

                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
#if USE_ACC
                MPI_Get_accumulate( NULL, 0, MPI_DATATYPE_NULL, &next_tail_ptr,
                                    sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                                    (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next),
                                    sizeof(llist_ptr_t), MPI_BYTE, MPI_NO_OP, llist_win);
#else
                MPI_Get(&next_tail_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                        (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next),
                        sizeof(llist_ptr_t), MPI_BYTE, llist_win);
#endif
                MPI_Win_unlock(tail_ptr.rank, llist_win);

                if (next_tail_ptr.rank != nil.rank) {
                    if (verbose)
                        printf("%d: Chasing to <%d, %p>\n", procid, next_tail_ptr.rank, (void*) next_tail_ptr.disp);
                    tail_ptr = next_tail_ptr;
                    pollint = MAX(MIN_NPROBE, pollint/2);
                }
                else {
                    for (j = 0; j < pollint; j++)
                        MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);

                    pollint = MIN(MAX_NPROBE, pollint*2);
                }
            }
        } while (!success);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    time = MPI_Wtime() - time;

    /* Traverse the list and verify that all processes inserted exactly the correct
       number of elements. */
    if (procid == 0) {
        int  errors    = 0;
        int *counts, count = 0;

        counts = (int*) malloc(sizeof(int) * nproc);
        assert(counts != NULL);

        for (i = 0; i < nproc; i++)
            counts[i] = 0;

        tail_ptr = head_ptr;

        MPI_Win_lock_all(0, llist_win);

        /* Walk the list and tally up the number of elements inserted by each rank */
        while (tail_ptr.disp != nil.disp) {
            llist_elem_t elem;

            MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
                    tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);

            MPI_Win_flush(tail_ptr.rank, llist_win);

            tail_ptr = elem.next;

            assert(elem.value >= 0 && elem.value < nproc);
            counts[elem.value]++;
            count++;

            if (verbose) {
                int last_elem = tail_ptr.disp == nil.disp;
                printf("%2d%s", elem.value, last_elem ? "" : " -> ");
                if (count % ELEM_PER_ROW == 0 && !last_elem)
                    printf("\n");
            }
        }

        MPI_Win_unlock_all(llist_win);

        if (verbose)
          printf("\n\n");

        /* Verify the counts we collected */
        for (i = 0; i < nproc; i++) {
            int expected;

            expected = NUM_ELEMS/nproc;
            if (i < NUM_ELEMS % nproc)
                expected++;

            if (counts[i] != expected) {
                printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected);
                errors++;
            }
        }

        printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
        free(counts);
    }

    if (print_perf) {
        double max_time;

        MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

        if (procid == 0) {
            printf("Total time = %0.2f sec, elem/sec = %0.2f, sec/elem = %0.2f usec\n", max_time, NUM_ELEMS/max_time, max_time/NUM_ELEMS*1.0e6);
        }
    }

    MPI_Win_free(&llist_win);

    /* Free all the elements in the list */
    for ( ; my_elems_count > 0; my_elems_count--)
        MPI_Free_mem(my_elems[my_elems_count-1]);

    MPI_Finalize();
    return 0;
}