/* * Initial query function that is invoked during initialization, allowing * this module to indicate what level of thread support it provides. */ int mca_atomic_basic_init(bool enable_progress_threads, bool enable_threads) { int rc = OSHMEM_SUCCESS; void* ptr = NULL; int num_pe = oshmem_num_procs(); rc = MCA_MEMHEAP_CALL(private_alloc((num_pe * sizeof(char)), &ptr)); if (rc == OSHMEM_SUCCESS) { atomic_lock_sync = (char*) ptr; memset(atomic_lock_sync, ATOMIC_LOCK_IDLE, sizeof(char) * num_pe); rc = MCA_MEMHEAP_CALL(private_alloc(sizeof(int), &ptr)); if (rc == OSHMEM_SUCCESS) { atomic_lock_turn = (int*) ptr; *atomic_lock_turn = 0; if (rc == OSHMEM_SUCCESS) { local_lock_sync = (char*) malloc(num_pe * sizeof(char)); local_lock_turn = (int*) malloc(sizeof(int)); if (!local_lock_sync || !local_lock_turn) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; } else { memcpy((void*) local_lock_sync, (void*) atomic_lock_sync, sizeof(char) * num_pe); *local_lock_turn = *atomic_lock_turn; } } } } return rc; }
int mca_scoll_enable(void) { int ret = OSHMEM_SUCCESS; if (!mca_scoll_sync_array) { void* ptr = (void*) mca_scoll_sync_array; int i = 0; MCA_MEMHEAP_CALL(private_alloc((_SHMEM_BARRIER_SYNC_SIZE * sizeof(*mca_scoll_sync_array)), &ptr)); mca_scoll_sync_array = ptr; for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) { mca_scoll_sync_array[i] = _SHMEM_SYNC_VALUE; } } /* Note: it is done to support FCA only and we need to consider possibility to * find a way w/o this ugly hack */ if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_all))) { return ret; } if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_self))) { return ret; } return OSHMEM_SUCCESS; }
int oshmem_shmem_preconnect_all(void) { int rc = OSHMEM_SUCCESS; /* force qp creation and rkey exchange for memheap. Does not force exchange of static vars */ if (oshmem_preconnect_all) { long val; int nproc; int my_pe; int i; val = 0xdeadbeaf; if (!preconnect_value) { rc = MCA_MEMHEAP_CALL(private_alloc(sizeof(long), (void **)&preconnect_value)); } if (!preconnect_value || (rc != OSHMEM_SUCCESS)) { SHMEM_API_ERROR("shmem_preconnect_all failed"); return OSHMEM_ERR_OUT_OF_RESOURCE; } nproc = oshmem_num_procs(); my_pe = oshmem_my_proc_id(); for (i = 0; i < nproc; i++) { shmem_long_p(preconnect_value, val, (my_pe + i) % nproc); } shmem_barrier_all(); SHMEM_API_VERBOSE(5, "Preconnected all PEs"); } return OSHMEM_SUCCESS; }
int oshmem_shmem_preconnect_all_finalize(void) { if (preconnect_value) { MCA_MEMHEAP_CALL(private_free(preconnect_value)); preconnect_value = 0; } return OSHMEM_SUCCESS; }
static int mca_scoll_base_close(void) { /* This call should be done before memheap close */ if (mca_scoll_sync_array) { void* ptr = (void*) mca_scoll_sync_array; MCA_MEMHEAP_CALL(private_free(ptr)); mca_scoll_sync_array = NULL; } return mca_base_framework_components_close(&oshmem_scoll_base_framework, NULL); }
int oshmem_shmem_preconnect_all(void) { int mca_value = 0; int rc = OSHMEM_SUCCESS; (void) mca_base_var_register("oshmem", "runtime", NULL, "preconnect_all", "Whether to force SHMEM processes to fully " "wire-up the connections between SHMEM " "processes during " "initialization (vs. making connections lazily -- " "upon the first SHMEM traffic between each " "process peer pair)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_value); /* force qp creation and rkey exchange for memheap. Does not force exchange of static vars */ if (mca_value) { long val; int nproc = 0; int i; val = 0xdeadbeaf; if (!preconnect_value) { rc = MCA_MEMHEAP_CALL(private_alloc(sizeof(long), (void **)&preconnect_value)); } if (!preconnect_value || (rc != OSHMEM_SUCCESS)) { SHMEM_API_ERROR("shmem_preconnect_all failed"); return OSHMEM_ERR_OUT_OF_RESOURCE; } nproc = _num_pes(); for (i = 0; i < nproc; i++) { shmem_long_p(preconnect_value, val, i); } shmem_fence(); shmem_barrier_all(); SHMEM_API_VERBOSE(5, "Preconnected all PEs"); } return OSHMEM_SUCCESS; }
static inline void _shfree(void* ptr) { int rc; RUNTIME_CHECK_INIT(); RUNTIME_CHECK_ADDR(ptr); #if OSHMEM_SPEC_COMPAT == 1 shmem_barrier_all(); #endif rc = MCA_MEMHEAP_CALL(free(ptr)); if (OSHMEM_SUCCESS != rc) { SHMEM_API_VERBOSE(10, "shfree failure."); } }
int mca_atomic_basic_finalize(void) { void* ptr = NULL; ptr = (void*) atomic_lock_sync; MCA_MEMHEAP_CALL(private_free(ptr)); atomic_lock_sync = NULL; ptr = (void*) atomic_lock_turn; MCA_MEMHEAP_CALL(private_free(ptr)); atomic_lock_turn = NULL; if (local_lock_sync) { free((void*) local_lock_sync); local_lock_sync = NULL; } if (local_lock_turn) { free((void*) local_lock_turn); local_lock_turn = NULL; } return OSHMEM_SUCCESS; }
int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) { int ret = OSHMEM_SUCCESS; if (!oshmem_shmem_initialized) { if (!ompi_mpi_initialized && !ompi_mpi_finalized) { ret = ompi_mpi_init(argc, argv, requested, provided); } MPI_Comm_dup(MPI_COMM_WORLD, &oshmem_comm_world); if (OSHMEM_SUCCESS == ret) { ret = _shmem_init(argc, argv, requested, provided); } if (OSHMEM_SUCCESS == ret) { oshmem_shmem_initialized = true; if (OSHMEM_SUCCESS != shmem_lock_init()) { SHMEM_API_ERROR( "shmem_lock_init() failed"); return OSHMEM_ERROR; } /* this is a collective op, implies barrier */ MCA_MEMHEAP_CALL(get_all_mkeys()); oshmem_shmem_preconnect_all(); #if OSHMEM_OPAL_THREAD_ENABLE pthread_t thread_id; int perr; perr = pthread_create(&thread_id, NULL, &shmem_opal_thread, NULL); if (perr != 0) { SHMEM_API_ERROR("cannot creat opal thread for SHMEM"); return OSHMEM_ERROR; } #endif } } #ifdef SIGUSR1 signal(SIGUSR1,sighandler__SIGUSR1); signal(SIGTERM,sighandler__SIGTERM); #endif return ret; }
/* * * Invoked when there's a new communicator that has been created. * * Look at the communicator and decide which set of functions and * * priority we want to return. * */ mca_scoll_base_module_t * mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority) { mca_scoll_base_module_t *module; int size = comm->proc_count; int local_peers = 0; mca_scoll_fca_module_t *fca_module; *priority = 0; module = NULL; if (!mca_scoll_fca_component.fca_enable) { FCA_VERBOSE(20, "FCA is disable on user request => exiting"); goto exit; } if (mca_memheap.memheap_component == NULL ) { FCA_VERBOSE(20, "No memheap => exiting"); goto exit; } if (NULL == mca_scoll_fca_component.ret) { MCA_MEMHEAP_CALL(private_alloc(sizeof(int),(void **)&mca_scoll_fca_component.ret)); MCA_MEMHEAP_CALL(private_alloc(oshmem_group_all->proc_count*sizeof(*mca_scoll_fca_component.rcounts), (void **)&mca_scoll_fca_component.rcounts )); MCA_MEMHEAP_CALL(private_alloc(/*info_size*/20,&mca_scoll_fca_component.my_info_exchangeable)); MCA_MEMHEAP_CALL(private_alloc(sizeof(fca_comm_desc_t), &mca_scoll_fca_component.fca_comm_desc_exchangeable)); } if (size < mca_scoll_fca_component.fca_np) { FCA_VERBOSE(20, "size(%d) < fca_np(%d)", size, mca_scoll_fca_component.fca_np); goto exit; } if (size < 2) { FCA_VERBOSE(20, "size(%d) < 2", size); goto exit; } if (!have_remote_peers(comm, size, &local_peers) /* || OMPI_COMM_IS_INTER(comm)*/) { FCA_VERBOSE(1, "all peers in group are on the same node, fca disabled\n"); goto exit; } fca_module = OBJ_NEW(mca_scoll_fca_module_t); if (!fca_module) { goto exit_fatal; } fca_module->super.scoll_module_enable = mca_scoll_fca_module_enable; fca_module->super.scoll_collect = mca_scoll_fca_component.fca_enable_allgather ? mca_scoll_fca_collect : NULL; fca_module->super.scoll_reduce = mca_scoll_fca_component.fca_enable_allreduce ? mca_scoll_fca_reduce : NULL; fca_module->super.scoll_barrier = mca_scoll_fca_component.fca_enable_barrier ? mca_scoll_fca_barrier : NULL; fca_module->super.scoll_broadcast = mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast : NULL; *priority = mca_scoll_fca_component.fca_priority; module = &fca_module->super; exit: FCA_VERBOSE(4, "Query FCA module for comm %p size %d rank %d local_peers=%d: priority=%d %s", (void *)comm, size, comm->my_pe, local_peers, *priority, module ? "enabled" : "disabled"); return module; exit_fatal: /* it is possible that other pe(s) succesfully initialized fca. * So differnt frameworks will be used for collective ops */ FCA_ERROR("FCA module query failed - aborting"); oshmem_shmem_abort(-1); return NULL ; }
int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) { int ret = OSHMEM_SUCCESS; OMPI_TIMING_INIT(32); if (!oshmem_shmem_initialized) { ret = ompi_mpi_init(argc, argv, requested, provided, true); OMPI_TIMING_NEXT("ompi_mpi_init"); if (OSHMEM_SUCCESS != ret) { return ret; } PMPI_Comm_dup(MPI_COMM_WORLD, &oshmem_comm_world); OMPI_TIMING_NEXT("PMPI_Comm_dup"); SHMEM_MUTEX_INIT(shmem_internal_mutex_alloc); ret = _shmem_init(argc, argv, requested, provided); OMPI_TIMING_NEXT("_shmem_init"); OMPI_TIMING_IMPORT_OPAL("mca_scoll_mpi_comm_query"); OMPI_TIMING_IMPORT_OPAL("mca_scoll_enable"); OMPI_TIMING_IMPORT_OPAL("mca_scoll_base_select"); if (OSHMEM_SUCCESS != ret) { return ret; } oshmem_shmem_initialized = true; if (OSHMEM_SUCCESS != shmem_lock_init()) { SHMEM_API_ERROR( "shmem_lock_init() failed"); return OSHMEM_ERROR; } OMPI_TIMING_NEXT("shmem_lock_init"); /* this is a collective op, implies barrier */ MCA_MEMHEAP_CALL(get_all_mkeys()); OMPI_TIMING_NEXT("get_all_mkeys()"); oshmem_shmem_preconnect_all(); OMPI_TIMING_NEXT("shmem_preconnect_all"); #if OSHMEM_OPAL_THREAD_ENABLE pthread_t thread_id; int perr; perr = pthread_create(&thread_id, NULL, &shmem_opal_thread, NULL); if (0 != perr) { SHMEM_API_ERROR("cannot create opal thread for SHMEM"); return OSHMEM_ERROR; } #endif OMPI_TIMING_NEXT("THREAD_ENABLE"); } #ifdef SIGUSR1 signal(SIGUSR1,sighandler__SIGUSR1); signal(SIGTERM,sighandler__SIGTERM); #endif OMPI_TIMING_OUT; OMPI_TIMING_FINALIZE; return ret; }