/** Perform an I/O vector operation. Local buffers must be private. * * @param[in] op Operation to be performed (ARMCII_OP_PUT, ...) * @param[in] src Array of source pointers * @param[in] dst Array of destination pointers * @param[in] count Length of pointer arrays * @param[in] size Size of each transfer * @param[in] datatype Data type for accumulate op (ignored for all others) * @param[in] overlapping Do remote regions overlap? * @param[in] same_alloc Do remote regions correspond to the same allocation? * @param[in] proc Target process * @return Zero on success, error code otherwise */ int ARMCII_Iov_op_dispatch(enum ARMCII_Op_e op, void **src, void **dst, int count, int size, int datatype, int overlapping, int same_alloc, int proc) { MPI_Datatype type; int type_count, type_size; if (op == ARMCII_OP_ACC) { ARMCII_Acc_type_translate(datatype, &type, &type_size); type_count = size/type_size; ARMCII_Assert_msg(size % type_size == 0, "Transfer size is not a multiple of type size"); } else { type = MPI_BYTE; MPI_Type_size(type, &type_size); type_count = size/type_size; ARMCII_Assert_msg(size % type_size == 0, "Transfer size is not a multiple of type size"); } // CONSERVATIVE CASE: If remote pointers overlap or remote pointers correspond to // multiple allocations, use the safe implementation to avoid invalid MPI // use. if (overlapping || !same_alloc || ARMCII_GLOBAL_STATE.iov_method == ARMCII_IOV_CONSRV) { if (overlapping) ARMCII_Warning("IOV remote buffers overlap\n"); if (!same_alloc) ARMCII_Warning("IOV remote buffers are not within the same allocation\n"); return ARMCII_Iov_op_safe(op, src, dst, count, type_count, type, proc); } // OPTIMIZED CASE: It's safe for us to issue all the operations under a // single lock. else if ( ARMCII_GLOBAL_STATE.iov_method == ARMCII_IOV_DIRECT || ARMCII_GLOBAL_STATE.iov_method == ARMCII_IOV_AUTO ) { if (ARMCII_GLOBAL_STATE.no_mpi_bottom == 1) { return ARMCII_Iov_op_datatype_no_bottom(op, src, dst, count, type_count, type, proc); } else { return ARMCII_Iov_op_datatype(op, src, dst, count, type_count, type, proc); } } else if (ARMCII_GLOBAL_STATE.iov_method == ARMCII_IOV_BATCHED) { return ARMCII_Iov_op_batched(op, src, dst, count, type_count, type, proc); } else { ARMCII_Error("unknown iov method (%d)\n", ARMCII_GLOBAL_STATE.iov_method); return 1; } }
/** Destroy/free a shared memory region. * * @param[in] ptr Pointer within range of the segment (e.g. base pointer). * @param[in] group Group on which to perform the free. */ void gmr_destroy(gmr_t *mreg, ARMCI_Group *group) { int search_proc_in, search_proc_out, search_proc_out_grp; void *search_base; int alloc_me, alloc_nproc; int world_me, world_nproc; MPI_Comm_rank(group->comm, &alloc_me); MPI_Comm_size(group->comm, &alloc_nproc); MPI_Comm_rank(ARMCI_GROUP_WORLD.comm, &world_me); MPI_Comm_size(ARMCI_GROUP_WORLD.comm, &world_nproc); /* All-to-all exchange of a <base address, proc> pair. This is so that we * can support passing NULL into ARMCI_Free() which is permitted when a * process allocates 0 bytes. Unfortunately, in this case we still need to * identify the mem region and free it. */ if (mreg == NULL) search_proc_in = -1; else { search_proc_in = world_me; search_base = mreg->slices[world_me].base; } /* Collectively decide on who will provide the base address */ MPI_Allreduce(&search_proc_in, &search_proc_out, 1, MPI_INT, MPI_MAX, group->comm); /* Everyone passed NULL. Nothing to free. */ if (search_proc_out < 0) return; /* Translate world rank to group rank */ search_proc_out_grp = ARMCII_Translate_absolute_to_group(group, search_proc_out); /* Broadcast the base address */ MPI_Bcast(&search_base, sizeof(void*), MPI_BYTE, search_proc_out_grp, group->comm); /* If we were passed NULL, look up the mem region using the <base, proc> pair */ if (mreg == NULL) mreg = gmr_lookup(search_base, search_proc_out); /* If it's still not found, the user may have passed the wrong group */ ARMCII_Assert_msg(mreg != NULL, "Could not locate the desired allocation"); switch (mreg->lock_state) { case GMR_LOCK_UNLOCKED: break; case GMR_LOCK_DLA: ARMCII_Warning("Releasing direct local access before freeing shared allocation\n"); gmr_dla_unlock(mreg); break; default: ARMCII_Error("Unable to free locked memory region (%d)\n", mreg->lock_state); } /* Remove from the list of mem regions */ if (mreg->prev == NULL) { ARMCII_Assert(gmr_list == mreg); gmr_list = mreg->next; if (mreg->next != NULL) mreg->next->prev = NULL; } else { mreg->prev->next = mreg->next; if (mreg->next != NULL) mreg->next->prev = mreg->prev; } /* Destroy the window and free all buffers */ MPI_Win_free(&mreg->window); if (mreg->slices[world_me].base != NULL) MPI_Free_mem(mreg->slices[world_me].base); free(mreg->slices); ARMCIX_Destroy_mutexes_hdl(mreg->rmw_mutex); free(mreg); }