static void pscom_extoll_init(void) { psex_debug = pscom.env.debug; psex_debug_stream = pscom_debug_stream(); pscom_env_get_uint(&psex_recvq_size, ENV_EXTOLL_RECVQ_SIZE); pscom_env_get_int(&psex_global_sendq, ENV_EXTOLL_GLOBAL_SENDQ); if (psex_global_sendq) { // One sendq for all connection. Allocate buffers for 1024 connections psex_sendq_size = 1024 * pscom_min(psex_sendq_size, psex_recvq_size); } else { // One sendq for each connection. limit sendq to recvq size. psex_sendq_size = pscom_min(psex_sendq_size, psex_recvq_size); } pscom_env_get_uint(&psex_sendq_size, ENV_EXTOLL_SENDQ_SIZE); psex_pending_tokens = psex_pending_tokens_suggestion(); pscom_env_get_uint(&psex_pending_tokens, ENV_EXTOLL_PENDING_TOKENS); // if (!psex_global_sendq && psex_sendq_size == psex_recvq_size) { // // Disable event counting: // psex_event_count = 0; // } pscom_env_get_int(&psex_event_count, ENV_EXTOLL_EVENT_CNT); }
static void pscom_psm_init(void) { pspsm_debug = pscom.env.debug; pspsm_debug_stream = pscom_debug_stream(); /* see comment in pspsm_init() */ pscom_env_get_uint(&pscom.env.psm_uniq_id, ENV_PSM_UNIQ_ID); if (!pscom.env.psm_uniq_id) { pscom_env_get_uint(&pscom.env.psm_uniq_id, ENV_PMI_ID); } INIT_LIST_HEAD(&pspsm_poll.poll.next); pspsm_poll.poll.do_read = pscom_psm_make_progress; // Preinitialize pspsm. Ignore errors. pscom_psm_connect will see the error again. pspsm_init(); }
static void pscom_openib_init(FILE *peer) { psoib_info_msg_t lmsg, rmsg; int rc; pscom.env.debug = arg_verbose; pscom_env_get_int(&pscom.env.debug, ENV_DEBUG); psoib_debug = pscom.env.debug; pscom_env_get_str(&psoib_hca, ENV_OPENIB_HCA); pscom_env_get_uint(&psoib_port, ENV_OPENIB_PORT); pscom_env_get_uint(&psoib_path_mtu, ENV_OPENIB_PATH_MTU); pscom_env_get_uint(&psoib_sendq_size, ENV_OPENIB_SENDQ_SIZE); pscom_env_get_uint(&psoib_recvq_size, ENV_OPENIB_RECVQ_SIZE); pscom_env_get_uint(&psoib_compq_size, ENV_OPENIB_COMPQ_SIZE); psoib_pending_tokens = psoib_pending_tokens_suggestion(); pscom_env_get_uint(&psoib_pending_tokens, ENV_OPENIB_PENDING_TOKENS); rc = psoib_init(); psoib_rc_check("psoib_init()", rc); mcon = psoib_con_create(); assert(mcon); rc = psoib_con_init(mcon, NULL, NULL); psoib_rc_check("psoib_con_init()", rc); psoib_con_get_info_msg(mcon, &lmsg); if (is_client) { info_write(peer, &lmsg); info_read(peer, &rmsg); } else { info_read(peer, &rmsg); info_write(peer, &lmsg); } rc = psoib_con_connect(mcon, &rmsg); psoib_rc_check("psoib_con_connect()", rc); }
static void pscom_openib_init(void) { psoib_debug = pscom.env.debug; psoib_debug_stream = pscom_debug_stream(); pscom_env_get_str(&psoib_hca, ENV_OPENIB_HCA); pscom_env_get_uint(&psoib_port, ENV_OPENIB_PORT); pscom_env_get_uint(&psoib_path_mtu, ENV_OPENIB_PATH_MTU); pscom_env_get_uint(&psoib_recvq_size, ENV_OPENIB_RECVQ_SIZE); pscom_env_get_int(&psoib_global_sendq, ENV_OPENIB_GLOBAL_SENDQ); pscom_env_get_uint(&psoib_compq_size, ENV_OPENIB_COMPQ_SIZE); if (psoib_global_sendq) { // One sendq for all connection. limit sendq to compq size. psoib_sendq_size = psoib_compq_size; } else { // One sendq for each connection. limit sendq to recvq size. psoib_sendq_size = pscom_min(psoib_sendq_size, psoib_recvq_size); } pscom_env_get_uint(&psoib_sendq_size, ENV_OPENIB_SENDQ_SIZE); psoib_pending_tokens = psoib_pending_tokens_suggestion(); pscom_env_get_uint(&psoib_pending_tokens, ENV_OPENIB_PENDING_TOKENS); // if (!psoib_global_sendq && psoib_sendq_size == psoib_recvq_size) { // // Disable event counting: // psoib_event_count = 0; // } pscom_env_get_int(&psoib_event_count, ENV_OPENIB_EVENT_CNT); pscom_env_get_int(&psoib_ignore_wrong_opcodes, ENV_OPENIB_IGNORE_WRONG_OPCODES); pscom_env_get_int(&psoib_lid_offset, ENV_OPENIB_LID_OFFSET); INIT_LIST_HEAD(&pscom_cq_poll.next); pscom_cq_poll.do_read = pscom_poll_cq; }
int MPID_Init(int *argc, char ***argv, int threadlevel_requested, int *threadlevel_provided, int *has_args, int *has_env) { int mpi_errno = MPI_SUCCESS; int pg_rank, pg_size, pg_id_sz; int appnum = -1; /* int universe_size; */ int has_parent; pscom_socket_t *socket; pscom_err_t rc; char *pg_id_name; char *parent_port; /* Call any and all MPID_Init type functions */ MPIR_Err_init(); MPIR_Datatype_init(); MPIR_Group_init(); mpid_debug_init(); assert(PSCOM_ANYPORT == -1); /* all codeplaces which depends on it are marked with: "assert(PSP_ANYPORT == -1);" */ MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_INIT); PMICALL(PMI_Init(&has_parent)); PMICALL(PMI_Get_rank(&pg_rank)); PMICALL(PMI_Get_size(&pg_size)); PMICALL(PMI_Get_appnum(&appnum)); *has_args = 1; *has_env = 1; /* without PMI_Get_universe_size() we see pmi error: '[unset]: write_line error; fd=-1' in PMI_KVS_Get()! */ /* PMICALL(PMI_Get_universe_size(&universe_size)); */ if (pg_rank < 0) pg_rank = 0; if (pg_size <= 0) pg_size = 1; if ( #ifndef MPICH_IS_THREADED 1 #else threadlevel_requested < MPI_THREAD_MULTIPLE #endif ) { rc = pscom_init(PSCOM_VERSION); if (rc != PSCOM_SUCCESS) { fprintf(stderr, "pscom_init(0x%04x) failed : %s\n", PSCOM_VERSION, pscom_err_str(rc)); exit(1); } } else { rc = pscom_init_thread(PSCOM_VERSION); if (rc != PSCOM_SUCCESS) { fprintf(stderr, "pscom_init_thread(0x%04x) failed : %s\n", PSCOM_VERSION, pscom_err_str(rc)); exit(1); } } /* Initialize the switches */ pscom_env_get_uint(&MPIDI_Process.env.enable_collectives, "PSP_COLLECTIVES"); #ifdef PSCOM_HAS_ON_DEMAND_CONNECTIONS /* if (pg_size > 32) MPIDI_Process.env.enable_ondemand = 1; */ pscom_env_get_uint(&MPIDI_Process.env.enable_ondemand, "PSP_ONDEMAND"); #else MPIDI_Process.env.enable_ondemand = 0; #endif /* enable_ondemand_spawn defaults to enable_ondemand */ MPIDI_Process.env.enable_ondemand_spawn = MPIDI_Process.env.enable_ondemand; pscom_env_get_uint(&MPIDI_Process.env.enable_ondemand_spawn, "PSP_ONDEMAND_SPAWN"); /* take SMP-related locality information into account (e.g., for MPI_Win_allocate_shared) */ pscom_env_get_uint(&MPIDI_Process.env.enable_smp_awareness, "PSP_SMP_AWARENESS"); /* take MSA-related topology information into account */ pscom_env_get_uint(&MPIDI_Process.env.enable_msa_awareness, "PSP_MSA_AWARENESS"); if(MPIDI_Process.env.enable_msa_awareness) { pscom_env_get_uint(&MPIDI_Process.msa_module_id, "PSP_MSA_MODULE_ID"); } #ifdef MPID_PSP_TOPOLOGY_AWARE_COLLOPS /* use hierarchy-aware collectives on SMP level */ pscom_env_get_uint(&MPIDI_Process.env.enable_smp_aware_collops, "PSP_SMP_AWARE_COLLOPS"); /* use hierarchy-aware collectives on MSA level (disables SMP-aware collops / FIX ME!) */ pscom_env_get_uint(&MPIDI_Process.env.enable_msa_aware_collops, "PSP_MSA_AWARE_COLLOPS"); if(MPIDI_Process.env.enable_msa_aware_collops) MPIDI_Process.env.enable_smp_aware_collops = 0; #endif #ifdef MPID_PSP_CREATE_HISTOGRAM /* collect statistics information and print them at the end of a run */ pscom_env_get_uint(&MPIDI_Process.env.enable_histogram, "PSP_HISTOGRAM"); pscom_env_get_uint(&MPIDI_Process.histo.max_size, "PSP_HISTOGRAM_MAX"); pscom_env_get_uint(&MPIDI_Process.histo.min_size, "PSP_HISTOGRAM_MIN"); pscom_env_get_uint(&MPIDI_Process.histo.step_width, "PSP_HISTOGRAM_SHIFT"); #endif /* pscom_env_get_uint(&mpir_allgather_short_msg, "PSP_ALLGATHER_SHORT_MSG"); pscom_env_get_uint(&mpir_allgather_long_msg, "PSP_ALLGATHER_LONG_MSG"); pscom_env_get_uint(&mpir_allreduce_short_msg, "PSP_ALLREDUCE_SHORT_MSG"); pscom_env_get_uint(&mpir_alltoall_short_msg, "PSP_ALLTOALL_SHORT_MSG"); pscom_env_get_uint(&mpir_alltoall_medium_msg, "PSP_ALLTOALL_MEDIUM_MSG"); pscom_env_get_uint(&mpir_alltoall_throttle, "PSP_ALLTOALL_THROTTLE"); pscom_env_get_uint(&mpir_bcast_short_msg, "PSP_BCAST_SHORT_MSG"); pscom_env_get_uint(&mpir_bcast_long_msg, "PSP_BCAST_LONG_MSG"); pscom_env_get_uint(&mpir_bcast_min_procs, "PSP_BCAST_MIN_PROCS"); pscom_env_get_uint(&mpir_gather_short_msg, "PSP_GATHER_SHORT_MSG"); pscom_env_get_uint(&mpir_gather_vsmall_msg, "PSP_GATHER_VSMALL_MSG"); pscom_env_get_uint(&mpir_redscat_commutative_long_msg, "PSP_REDSCAT_COMMUTATIVE_LONG_MSG"); pscom_env_get_uint(&mpir_redscat_noncommutative_short_msg, "PSP_REDSCAT_NONCOMMUTATIVE_SHORT_MSG"); pscom_env_get_uint(&mpir_reduce_short_msg, "PSP_REDUCE_SHORT_MSG"); pscom_env_get_uint(&mpir_scatter_short_msg, "PSP_SCATTER_SHORT_MSG"); */ socket = pscom_open_socket(0, 0); if (!MPIDI_Process.env.enable_ondemand) { socket->ops.con_accept = mpid_con_accept; } { char name[10]; snprintf(name, sizeof(name), "r%07u", (unsigned)pg_rank); pscom_socket_set_name(socket, name); } rc = pscom_listen(socket, PSCOM_ANYPORT); if (rc != PSCOM_SUCCESS) { PRINTERROR("pscom_listen(PSCOM_ANYPORT)"); goto fn_fail; } /* Note that if pmi is not availble, the value of MPI_APPNUM is not set */ /* if (appnum != -1) {*/ MPIR_Process.attrs.appnum = appnum; /* }*/ #if 0 // see mpiimpl.h: // typedef struct PreDefined_attrs { // int appnum; /* Application number provided by mpiexec (MPI-2) */ // int host; /* host */ // int io; /* standard io allowed */ // int lastusedcode; /* last used error code (MPI-2) */ // int tag_ub; /* Maximum message tag */ // int universe; /* Universe size from mpiexec (MPI-2) */ // int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */ // } PreDefined_attrs; #endif MPIR_Process.attrs.tag_ub = MPIDI_TAG_UB; /* obtain the id of the process group */ PMICALL(PMI_KVS_Get_name_length_max(&pg_id_sz)); pg_id_name = MPL_malloc(pg_id_sz + 1, MPL_MEM_STRINGS); if (!pg_id_name) { PRINTERROR("MPL_malloc()"); goto fn_fail; } PMICALL(PMI_KVS_Get_my_name(pg_id_name, pg_id_sz)); /* safe */ /* MPIDI_Process.socket = socket; */ MPIDI_Process.my_pg_rank = pg_rank; MPIDI_Process.my_pg_size = pg_size; MPIDI_Process.pg_id_name = pg_id_name; if (!MPIDI_Process.env.enable_ondemand) { /* Create and establish all connections */ if (InitPortConnections(socket) != MPI_SUCCESS) goto fn_fail; } else { /* Create all connections as "on demand" connections. */ if (InitPscomConnections(socket) != MPI_SUCCESS) goto fn_fail; } #ifdef MPID_PSP_TOPOLOGY_AWARE_COLLOPS { int grank; int my_node_id = -1; int remote_node_id = -1; int* node_id_table; if(MPIDI_Process.env.enable_msa_awareness && MPIDI_Process.env.enable_msa_aware_collops) { my_node_id = MPIDI_Process.msa_module_id; assert(my_node_id > -1); } else if(MPIDI_Process.env.enable_smp_awareness && MPIDI_Process.env.enable_smp_aware_collops) { if (!MPIDI_Process.env.enable_ondemand) { /* In the PSP_ONDEMAND=0 case, we can just check the pscom connection types: */ for (grank = 0; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); if( (con->type == PSCOM_CON_TYPE_SHM) || (pg_rank == grank) ) { my_node_id = grank; break; } } } else { /* In the PSP_ONDEMAND=1 case, we have to use a hash of the host name: */ my_node_id = MPID_PSP_get_host_hash(); if(my_node_id < 0) my_node_id *= -1; } assert(my_node_id > -1); } else { /* No hierarchy-awareness requested */ assert(my_node_id == -1); } if(my_node_id > -1) { node_id_table = MPL_malloc(pg_size * sizeof(int), MPL_MEM_OBJECT); if(pg_rank != 0) { /* gather: */ pscom_connection_t *con = grank2con_get(0); assert(con); pscom_send(con, NULL, 0, &my_node_id, sizeof(int)); /* bcast: */ rc = pscom_recv_from(con, NULL, 0, node_id_table, pg_size*sizeof(int)); assert(rc == PSCOM_SUCCESS); } else { /* gather: */ node_id_table[0] = my_node_id; for(grank=1; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); assert(con); rc = pscom_recv_from(con, NULL, 0, &remote_node_id, sizeof(int)); assert(rc == PSCOM_SUCCESS); node_id_table[grank] = remote_node_id; } /* bcast: */ for(grank=1; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); pscom_send(con, NULL, 0, node_id_table, pg_size*sizeof(int)); } } MPIDI_Process.node_id_table = node_id_table; } else { /* No hierarchy-awareness requested */ assert(MPIDI_Process.node_id_table == NULL); } } #endif /* * Initialize the MPI_COMM_WORLD object */ { MPIR_Comm * comm; int grank; MPIDI_PG_t * pg_ptr; int pg_id_num; MPIDI_VCRT_t * vcrt; comm = MPIR_Process.comm_world; comm->rank = pg_rank; comm->remote_size = pg_size; comm->local_size = pg_size; comm->pscom_socket = socket; vcrt = MPIDI_VCRT_Create(comm->remote_size); assert(vcrt); MPID_PSP_comm_set_vcrt(comm, vcrt); MPIDI_PG_Convert_id(pg_id_name, &pg_id_num); MPIDI_PG_Create(pg_size, pg_id_num, &pg_ptr); assert(pg_ptr == MPIDI_Process.my_pg); for (grank = 0; grank < pg_size; grank++) { /* MPIR_CheckDisjointLpids() in mpi/comm/intercomm_create.c expect lpid to be smaller than 4096!!! Else you will see an "Fatal error in MPI_Intercomm_create" */ pscom_connection_t *con = grank2con_get(grank); pg_ptr->vcr[grank] = MPIDI_VC_Create(pg_ptr, grank, con, grank); comm->vcr[grank] = MPIDI_VC_Dup(pg_ptr->vcr[grank]); } mpi_errno = MPIR_Comm_commit(comm); assert(mpi_errno == MPI_SUCCESS); } /* * Initialize the MPI_COMM_SELF object */ { MPIR_Comm * comm; MPIDI_VCRT_t * vcrt; comm = MPIR_Process.comm_self; comm->rank = 0; comm->remote_size = 1; comm->local_size = 1; comm->pscom_socket = socket; vcrt = MPIDI_VCRT_Create(comm->remote_size); assert(vcrt); MPID_PSP_comm_set_vcrt(comm, vcrt); comm->vcr[0] = MPIDI_VC_Dup(MPIR_Process.comm_world->vcr[pg_rank]); mpi_errno = MPIR_Comm_commit(comm); assert(mpi_errno == MPI_SUCCESS); } /* ToDo: move MPID_enable_receive_dispach to bg thread */ MPID_enable_receive_dispach(socket); if (threadlevel_provided) { *threadlevel_provided = (MPICH_THREAD_LEVEL < threadlevel_requested) ? MPICH_THREAD_LEVEL : threadlevel_requested; } if (has_parent) { MPIR_Comm * comm; mpi_errno = MPID_PSP_GetParentPort(&parent_port); assert(mpi_errno == MPI_SUCCESS); /* printf("%s:%u:%s Child with Parent: %s\n", __FILE__, __LINE__, __func__, parent_port); */ mpi_errno = MPID_Comm_connect(parent_port, NULL, 0, MPIR_Process.comm_world, &comm); if (mpi_errno != MPI_SUCCESS) { fprintf(stderr, "MPI_Comm_connect(parent) failed!\n"); goto fn_fail; } assert(comm != NULL); MPL_strncpy(comm->name, "MPI_COMM_PARENT", MPI_MAX_OBJECT_NAME); MPIR_Process.comm_parent = comm; } MPID_PSP_shm_rma_init(); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_INIT); return mpi_errno; /* --- */ fn_fail: /* A failing MPI_Init() did'nt call the MPI error handler, which mostly calls abort(). This cause MPI_Init() to return the mpi_errno, which nobody check, causing segfaultm double frees and so on. To prevent strange error messages, we now call _exit(1) here. */ _exit(1); }
void pscom_env_init(void) { pscom_pslib_read_config(ENV_CONFIG_FILES); pscom_debug_set_filename(pscom_env_get(ENV_DEBUG_OUT), 1); if (!pscom.env.debug) { // only set debug once! pscom_env_get_int(&pscom.env.debug, ENV_DEBUG); } if (pscom_pslib_available) { pscom_env_get_str(&pscom.env.info, ENV_INFO); if (pscom.env.info) pscom_info_connect(pscom.env.info); } DPRINT(1,"# Version(PSCOM): %s (%s)", __DATE__, VC_VERSION); pscom_env_get_uint(&pscom.env.so_sndbuf, ENV_SO_SNDBUF); pscom_env_get_uint(&pscom.env.so_rcvbuf, ENV_SO_RCVBUF); pscom_env_get_int(&pscom.env.tcp_nodelay, ENV_TCP_NODELAY); pscom_env_get_uint(&pscom.env.tcp_backlog, ENV_TCP_BACKLOG); // pscom_env_get_int(&env.nobgthread, ENV_NOBGTHREAD); pscom_env_get_int(&pscom.env.sched_yield, ENV_SCHED_YIELD); pscom_env_get_int(&pscom.env.unexpected_receives, ENV_UNEXPECTED_RECEIVES); pscom_env_get_uint(&pscom.env.rendezvous_size, ENV_RENDEZVOUS); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_shm = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_shm, ENV_RENDEZVOUS_SHM); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_dapl = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_dapl, ENV_RENDEZVOUS_DAPL); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_elan = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_elan, ENV_RENDEZVOUS_ELAN); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_extoll = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_extoll, ENV_RENDEZVOUS_EXTOLL); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_velo = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_velo, ENV_RENDEZVOUS_VELO); if (pscom.env.rendezvous_size != (unsigned)~0) pscom.env.rendezvous_size_openib = pscom.env.rendezvous_size; pscom_env_get_uint(&pscom.env.rendezvous_size_openib, ENV_RENDEZVOUS_OPENIB); pscom_env_get_int(&pscom.env.sigquit, ENV_SIGQUIT); pscom_env_get_uint(&pscom.env.readahead, ENV_READAHEAD); pscom_env_get_uint(&pscom.env.retry, ENV_RETRY); pscom.env.readahead = pscom_max(pscom.env.readahead, sizeof(pscom_header_net_t)); pscom_env_get_str(&pscom.env.network, ENV_NETWORK); pscom_env_get_dir(&pscom.env.plugindir, ENV_PLUGINDIR); pscom_env_get_int(&pscom.env.debug_req, ENV_DEBUG_REQ); if (pscom.env.debug >= 2) pscom.env.debug_stats = 1; pscom_env_get_int(&pscom.env.debug_stats, ENV_DEBUG_STATS); pscom_env_get_uint(&pscom.env.iprobe_count, ENV_IPROBE_COUNT); }