int oshmpi_trylock(long * lockp) { int is_locked = -1, nil = -1; oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; lock->prev = -1; /* Get the last tail, if -1 replace with me */ MPI_Compare_and_swap (&shmem_world_rank, &nil, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Find if the last proc is holding lock */ if (lock->prev != -1) { MPI_Fetch_and_op (NULL, &is_locked, MPI_INT, lock->prev, LOCK_DISP, MPI_NO_OP, oshmpi_lock_win); MPI_Win_flush (lock->prev, oshmpi_lock_win); if (is_locked) return 0; } /* Add myself in tail */ MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Hold lock */ oshmpi_lock_base[LOCK_DISP] = 1; MPI_Win_sync (oshmpi_lock_win); return 1; }
dart_ret_t dart_lock_acquire (dart_lock_t lock) { dart_unit_t unitid; dart_team_myid (lock -> teamid, &unitid); if (lock -> is_acquired == 1) { printf ("Warning: LOCK - %2d has acquired the lock already\n", unitid); return DART_OK; } dart_gptr_t gptr_tail; dart_gptr_t gptr_list; int32_t predecessor[1], result[1]; MPI_Win win; MPI_Status status; DART_GPTR_COPY(gptr_tail, lock -> gptr_tail); DART_GPTR_COPY(gptr_list, lock -> gptr_list); uint64_t offset_tail = gptr_tail.addr_or_offs.offset; int16_t seg_id = gptr_list.segid; dart_unit_t tail = gptr_tail.unitid; uint16_t index = gptr_list.flags; MPI_Aint disp_list; /* MPI-3 newly added feature: atomic operation*/ MPI_Fetch_and_op (&unitid, predecessor, MPI_INT32_T, tail, offset_tail, MPI_REPLACE, dart_win_local_alloc); MPI_Win_flush (tail, dart_win_local_alloc); /* If there was a previous tail (predecessor), update the previous tail's next pointer with unitid * and wait for notification from its predecessor. */ if (*predecessor != -1) { if (dart_adapt_transtable_get_disp (seg_id, *predecessor, &disp_list) == -1) { return DART_ERR_INVAL; } win = dart_win_lists[index]; /* Atomicity: Update its predecessor's next pointer */ MPI_Fetch_and_op (&unitid, result, MPI_INT32_T, *predecessor, disp_list, MPI_REPLACE, win); MPI_Win_flush (*predecessor, win); /* Waiting for notification from its predecessor*/ DART_LOG_DEBUG ("%2d: LOCK - waiting for notification from %d in team %d", unitid, *predecessor, (lock -> teamid)); MPI_Recv (NULL, 0, MPI_INT, *predecessor, 0, dart_teams[index], &status); } DART_LOG_DEBUG ("%2d: LOCK - lock required in team %d", unitid, (lock -> teamid)); lock -> is_acquired = 1; return DART_OK; }
/** Unlock a mutex. * * @param[in] hdl Handle to the mutex * @return MPI status */ int MCS_Mutex_unlock(MCS_Mutex hdl) { int next; /* Read my next pointer. FOP is used since another process may write to * this location concurrent with this read. */ MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window); MPI_Win_flush(shmem_world_rank, hdl->window); if ( next == -1) { int tail; int nil = -1; /* Check if we are the at the tail of the lock queue. If so, we're * done. If not, we need to send notification. */ MPI_Compare_and_swap(&nil, &shmem_world_rank, &tail, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, hdl->window); MPI_Win_flush(hdl->tail_rank, hdl->window); if (tail != shmem_world_rank) { debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", shmem_world_rank, tail); assert(tail >= 0 && tail < shmem_world_size); for (;;) { int flag; MPI_Fetch_and_op(NULL, &next, MPI_INT, shmem_world_rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window); MPI_Win_flush(shmem_world_rank, hdl->window); if (next != -1) break; /* Is this here just to poke progress? If yes, then that is lame. */ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } } } /* Notify the next waiting process */ if (next != -1) { debug_print("%2d: UNLOCK - notifying %d\n", shmem_world_rank, next); MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm); } debug_print("%2d: UNLOCK - lock released\n", shmem_world_rank); return MPI_SUCCESS; }
int main(int argc, char **argv) { int i, rank, nproc, mpi_type_size; int errors = 0, all_errors = 0; TYPE_C *val_ptr, *res_ptr; MPI_Win win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Type_size(TYPE_MPI, &mpi_type_size); assert(mpi_type_size == sizeof(TYPE_C)); val_ptr = malloc(sizeof(TYPE_C)*nproc); res_ptr = malloc(sizeof(TYPE_C)*nproc); MPI_Win_create(val_ptr, sizeof(TYPE_C)*nproc, sizeof(TYPE_C), MPI_INFO_NULL, MPI_COMM_WORLD, &win); /* Test self communication */ reset_vars(val_ptr, res_ptr, win); for (i = 0; i < ITER; i++) { TYPE_C one = 1, result = -1; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, win); MPI_Fetch_and_op(&one, &result, TYPE_MPI, rank, 0, MPI_SUM, win); MPI_Win_unlock(rank, win); } MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, win); if ( CMP(val_ptr[0], ITER) ) { SQUELCH( printf("%d->%d -- SELF: expected "TYPE_FMT", got "TYPE_FMT"\n", rank, rank, (TYPE_C) ITER, val_ptr[0]); ); errors++; }
/** Lock a mutex. * * @param[in] hdl Handle to the mutex * @return MPI status */ int MCS_Mutex_lock(MCS_Mutex hdl) { int prev; /* This store is safe, since it cannot happen concurrently with a remote * write */ hdl->base[MCS_MTX_ELEM_DISP] = -1; MPI_Win_sync(hdl->window); MPI_Fetch_and_op(&shmem_world_rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, MPI_REPLACE, hdl->window); MPI_Win_flush(hdl->tail_rank, hdl->window); /* If there was a previous tail, update their next pointer and wait for * notification. Otherwise, the mutex was successfully acquired. */ if (prev != -1) { /* Wait for notification */ MPI_Status status; MPI_Accumulate(&shmem_world_rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window); MPI_Win_flush(prev, hdl->window); debug_print("%2d: LOCK - waiting for notification from %d\n", shmem_world_rank, prev); MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status); } debug_print("%2d: LOCK - lock acquired\n", shmem_world_rank); return MPI_SUCCESS; }
/*Run FOP with Lock/unlock */ void run_fop_with_lock(int rank, WINDOW type) { int i; MPI_Aint disp = 0; MPI_Win win; allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if(rank == 0) { if (type == WIN_DYNAMIC) { disp = disp_remote; } for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, win)); MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_unlock(1, win)); } t_end = MPI_Wtime (); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, 8); free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }
/*Run FOP with Fence */ void run_fop_with_fence(int rank, WINDOW type) { int i; MPI_Aint disp = 0; MPI_Win win; allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if (type == WIN_DYNAMIC) { disp = disp_remote; } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if(rank == 0) { for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Win_fence(0, win)); MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_fence(0, win)); MPI_CHECK(MPI_Win_fence(0, win)); } t_end = MPI_Wtime (); } else { for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_fence(0, win)); MPI_CHECK(MPI_Win_fence(0, win)); MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 0, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_fence(0, win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, 8, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); fflush(stdout); } free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }
void shmemx_ct_set(shmemx_ct_t ct, long value) { #ifdef ENABLE_SMP_OPTIMIZATIONS if (shmem_world_is_smp) { __sync_lock_test_and_set(ct,value); } else #endif { shmem_offset_t win_offset = (ptrdiff_t)((intptr_t)ct - (intptr_t)shmem_sheap_base_ptr); MPI_Fetch_and_op(&value, NULL, MPI_LONG, shmem_world_rank, win_offset, MPI_REPLACE, shmem_sheap_win); MPI_Win_flush(shmem_world_rank, shmem_sheap_win); } return; }
long shmemx_ct_get(shmemx_ct_t ct) { #ifdef ENABLE_SMP_OPTIMIZATIONS if (shmem_world_is_smp) { return __sync_fetch_and_add(ct,0); } else #endif { shmem_offset_t win_offset = (ptrdiff_t)((intptr_t)ct - (intptr_t)shmem_sheap_base_ptr); long output; MPI_Fetch_and_op(NULL, &output, MPI_LONG, shmem_world_rank, win_offset, MPI_NO_OP, shmem_sheap_win); MPI_Win_flush_local(shmem_world_rank, shmem_sheap_win); return output; } }
void oshmpi_unlock(long * lockp) { oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; /* Determine my next process */ MPI_Fetch_and_op (NULL, &(lock->next), MPI_INT, shmem_world_rank, NEXT_DISP, MPI_NO_OP, oshmpi_lock_win); MPI_Win_flush (shmem_world_rank, oshmpi_lock_win); if (lock->next != -1) { MPI_Send (&shmem_world_rank, 1, MPI_INT, lock->next, 999, MPI_COMM_WORLD); } /* Release lock */ oshmpi_lock_base[LOCK_DISP] = -1; MPI_Win_sync (oshmpi_lock_win); return; }
void oshmpi_lock(long * lockp) { MPI_Status status; oshmpi_lock_t *lock = (oshmpi_lock_t *) lockp; /* Replace myself with the last tail */ MPI_Fetch_and_op (&shmem_world_rank, &(lock->prev), MPI_INT, TAIL, TAIL_DISP, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (TAIL, oshmpi_lock_win); /* Previous proc holding lock will eventually notify */ if (lock->prev != -1) { /* Send my shmem_world_rank to previous proc's next */ MPI_Accumulate (&shmem_world_rank, 1, MPI_INT, lock->prev, NEXT_DISP, 1, MPI_INT, MPI_REPLACE, oshmpi_lock_win); MPI_Win_flush (lock->prev, oshmpi_lock_win); MPI_Probe (lock->prev, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } /* Hold lock */ oshmpi_lock_base[LOCK_DISP] = 1; MPI_Win_sync (oshmpi_lock_win); return; }
int main(int argc, char **argv) { int procid, nproc, i; MPI_Win llist_win; llist_ptr_t head_ptr, tail_ptr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win); /* Process 0 creates the head node */ if (procid == 0) head_ptr.disp = alloc_elem(-1, llist_win); /* Broadcast the head pointer to everyone */ head_ptr.rank = 0; MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD); tail_ptr = head_ptr; /* All processes concurrently append NUM_ELEMS elements to the list */ for (i = 0; i < NUM_ELEMS; i++) { llist_ptr_t new_elem_ptr; int success; /* Create a new list element and register it with the window */ new_elem_ptr.rank = procid; new_elem_ptr.disp = alloc_elem(procid, llist_win); /* Append the new node to the list. This might take multiple attempts if others have already appended and our tail pointer is stale. */ do { llist_ptr_t next_tail_ptr = nil; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank, (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); success = (next_tail_ptr.rank == nil.rank); if (success) { int i, flag; MPI_Aint result; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_REPLACE, llist_win); /* Note: accumulate is faster, since we don't need the result. Replacing with Fetch_and_op to create a more complete test case. */ /* MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1, MPI_AINT, MPI_REPLACE, llist_win); */ MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = new_elem_ptr; /* For implementations that use pt-to-pt messaging, force progress for other threads' RMA operations. */ for (i = 0; i < NPROBE; i++) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } else { /* Tail pointer is stale, fetch the displacement. May take multiple tries if it is being updated. */ do { MPI_Aint junk = 0; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_NO_OP, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); } while (next_tail_ptr.disp == nil.disp); tail_ptr = next_tail_ptr; } } while (!success); } MPI_Barrier(MPI_COMM_WORLD); /* Traverse the list and verify that all processes inserted exactly the correct number of elements. */ if (procid == 0) { int have_root = 0; int errors = 0; int *counts, count = 0; counts = (int*) malloc(sizeof(int) * nproc); assert(counts != NULL); for (i = 0; i < nproc; i++) counts[i] = 0; tail_ptr = head_ptr; /* Walk the list and tally up the number of elements inserted by each rank */ while (tail_ptr.disp != nil.disp) { llist_elem_t elem; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE, tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = elem.next; /* This is not the root */ if (have_root) { assert(elem.value >= 0 && elem.value < nproc); counts[elem.value]++; count++; if (verbose) { int last_elem = tail_ptr.disp == nil.disp; printf("%2d%s", elem.value, last_elem ? "" : " -> "); if (count % ELEM_PER_ROW == 0 && !last_elem) printf("\n"); } } /* This is the root */ else { assert(elem.value == -1); have_root = 1; } } if (verbose) printf("\n\n"); /* Verify the counts we collected */ for (i = 0; i < nproc; i++) { int expected = NUM_ELEMS; if (counts[i] != expected) { printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected); errors++; } } printf("%s\n", errors == 0 ? " No Errors" : "FAIL"); free(counts); } MPI_Win_free(&llist_win); /* Free all the elements in the list */ for ( ; my_elems_count > 0; my_elems_count--) MPI_Free_mem(my_elems[my_elems_count-1]); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { MPI_Win counter, table; char commands[MAX_COMMANDS][MAX_COMMAND_LEN] = {{0}}; int rank, comm_size, i; // setenv ("MPICH_ASYNC_PROGRESS", "1", 0); MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &comm_size); MPI_Win_create (&commands, MAX_COMMANDS * MAX_COMMAND_LEN, MAX_COMMAND_LEN, MPI_INFO_NULL, MPI_COMM_WORLD, &table); MPI_Win_fence (0, table); // Distribute command lines to tasks, round-robin, start from task 1 i = 0; if (rank == 0) { char line[MAX_COMMAND_LEN + 2]; MPI_Win_lock_all (MPI_MODE_NOCHECK, table); while (fgets (line, MAX_COMMAND_LEN + 2, stdin) != NULL) { if (i >= MAX_COMMANDS * comm_size) { fprintf (stderr, "MAX_COMMANDS * comm_size (%d) exceeded.\n", i); MPI_Abort(MPI_COMM_WORLD, 1); } if (strlen (line) > MAX_COMMAND_LEN) { fprintf (stderr, "MAX_COMMAND_LEN exceeded, line %d: %s\n", i, line); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Aint disp = (i / comm_size); int target_rank = (i + 1) % comm_size; MPI_Put (line, MAX_COMMAND_LEN, MPI_CHAR, target_rank, disp, MAX_COMMAND_LEN, MPI_CHAR, table); MPI_Win_flush_local (target_rank, table); i++; } MPI_Win_unlock_all (table); } MPI_Win_fence (0, table); // Initialize next_command counter/pointer to the top of the command // line stack. int next_command; MPI_Win_create (&next_command, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &counter); for (i = MAX_COMMANDS - 1; i >= 0; i--) { if (commands[i][0]) { next_command = i; break; } } MPI_Barrier (MPI_COMM_WORLD); // Execute command lines // // Process commands from own rank + steal_increment const int dec = -1; int steal_increment = 0; int current_command; while (steal_increment < comm_size) { int current_rank = (rank + steal_increment) % comm_size; MPI_Win_lock (MPI_LOCK_SHARED, current_rank, 0, counter); MPI_Fetch_and_op (&dec, ¤t_command, MPI_INT, current_rank, 0, MPI_SUM, counter); MPI_Win_unlock (current_rank, counter); if (current_command < 0) { steal_increment++; } else { char command[MAX_COMMAND_LEN] = {0}; MPI_Win_lock (MPI_LOCK_SHARED, current_rank, MPI_MODE_NOCHECK, table); MPI_Get (&command, MAX_COMMAND_LEN, MPI_CHAR, current_rank, current_command, MAX_COMMAND_LEN, MPI_CHAR, table); MPI_Win_unlock (current_rank, table); system (command); } } MPI_Win_free (&counter); MPI_Win_free (&table); MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); exit (0); }
void IMB_rma_fetch_and_op (struct comm_info* c_info, int size, struct iter_schedule* iterations, MODES run_mode, double* time) { double res_time = -1.; Type_Size r_size; int r_off; int i; int root = c_info->pair1; ierr = 0; if (c_info->rank < 0) { *time = res_time; return; } MPI_Type_size(c_info->red_data_type,&r_size); r_off=iterations->r_offs/r_size; for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator); if (c_info->rank == c_info->pair0) { MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN); if (run_mode->AGGREGATE) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Fetch_and_op( (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs, (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs, c_info->red_data_type, root, i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); } ierr = MPI_Win_flush(root, c_info->WIN); res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } else if ( !run_mode->AGGREGATE ) { res_time = MPI_Wtime(); for (i = 0; i < iterations->n_sample; i++) { ierr = MPI_Fetch_and_op( (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs, (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs, c_info->red_data_type, root, i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN ); MPI_ERRHAND(ierr); ierr = MPI_Win_flush(root, c_info->WIN); MPI_ERRHAND(ierr); } res_time = (MPI_Wtime() - res_time)/iterations->n_sample; } MPI_Win_unlock(root, c_info->WIN); } MPI_Barrier(c_info->communicator); *time = res_time; return; }
dart_ret_t dart_lock_release (dart_lock_t lock) { dart_unit_t unitid; dart_team_myid (lock -> teamid, &unitid); if (lock -> is_acquired == 0) { printf ("Warning: RELEASE - %2d has not yet required the lock\n", unitid); return DART_OK; } dart_gptr_t gptr_tail; dart_gptr_t gptr_list; MPI_Win win; int32_t *addr2, next, result[1]; MPI_Aint disp_list; int32_t origin[1] = {-1}; DART_GPTR_COPY(gptr_tail, lock -> gptr_tail); DART_GPTR_COPY(gptr_list, lock -> gptr_list); uint64_t offset_tail = gptr_tail.addr_or_offs.offset; int16_t seg_id = gptr_list.segid; dart_unit_t tail = gptr_tail.unitid; uint16_t index = gptr_list.flags; dart_gptr_getaddr(gptr_list, (void*)&addr2); win = dart_win_lists[index]; /* Atomicity: Check if we are at the tail of this lock queue, if so, we are done. * Otherwise, we still need to send notification. */ MPI_Compare_and_swap (origin, &unitid, result, MPI_INT32_T, tail, offset_tail, dart_win_local_alloc); MPI_Win_flush (tail, dart_win_local_alloc); /* We are not at the tail of this lock queue. */ if (*result != unitid) { DART_LOG_DEBUG ("%2d: UNLOCK - waiting for next pointer (tail = %d) in team %d", unitid, *result, (lock -> teamid)); if (dart_adapt_transtable_get_disp (seg_id, unitid, &disp_list) == -1) { return DART_ERR_INVAL; } /* Waiting for the update of my next pointer finished. */ while (1) { MPI_Fetch_and_op (NULL, &next, MPI_INT, unitid, disp_list, MPI_NO_OP, win); MPI_Win_flush (unitid, win); if (next != -1) break; } DART_LOG_DEBUG ("%2d: UNLOCK - notifying %d in team %d", unitid, next, (lock -> teamid)); /* Notifying the next unit waiting on the lock queue. */ MPI_Send (NULL, 0, MPI_INT, next, 0, dart_teams[index]); *addr2 = -1; MPI_Win_sync (win); } lock -> is_acquired = 0; DART_LOG_DEBUG ("%2d: UNLOCK - release lock in team %d", unitid, (lock -> teamid)); return DART_OK; }
/*Run FOP with Post/Start/Complete/Wait */ void run_fop_with_pscw(int rank, WINDOW type) { int destrank, i; MPI_Aint disp = 0; MPI_Win win; MPI_Group comm_group, group; MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &comm_group)); allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if (type == WIN_DYNAMIC) { disp = disp_remote; } if (rank == 0) { destrank = 1; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_start (group, 0, win)); if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); } t_end = MPI_Wtime (); } else { /* rank=1 */ destrank = 0; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); MPI_CHECK(MPI_Win_start(group, 0, win)); MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 0, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, 8, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); fflush(stdout); } MPI_CHECK(MPI_Group_free(&group)); MPI_CHECK(MPI_Group_free(&comm_group)); free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }