int MPIDI_VCRT_Create(int size, struct MPIDI_VCRT **vcrt_ptr) { MPIDI_VCRT_t * vcrt; int mpi_errno = MPI_SUCCESS; MPIR_CHKPMEM_DECL(1); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_VCRT_CREATE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_VCRT_CREATE); MPIR_CHKPMEM_MALLOC(vcrt, MPIDI_VCRT_t *, sizeof(MPIDI_VCRT_t) + (size - 1) * sizeof(MPIDI_VC_t *), mpi_errno, "**nomem"); vcrt->handle = HANDLE_SET_KIND(0, HANDLE_KIND_INVALID); MPIR_Object_set_ref(vcrt, 1); vcrt->size = size; *vcrt_ptr = vcrt; fn_exit: MPIR_CHKPMEM_COMMIT(); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_VCRT_CREATE); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ MPIR_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */ }
/* Fully initialize a VC. This invokes the channel-specific VC initialization routine MPIDI_CH3_VC_Init . */ int MPIDI_VC_Init( MPIDI_VC_t *vc, MPIDI_PG_t *pg, int rank ) { vc->state = MPIDI_VC_STATE_INACTIVE; vc->handle = HANDLE_SET_MPI_KIND(0, MPIR_VCONN); MPIR_Object_set_ref(vc, 0); vc->pg = pg; vc->pg_rank = rank; vc->lpid = lpid_counter++; vc->node_id = -1; MPIDI_VC_Init_seqnum_send(vc); MPIDI_VC_Init_seqnum_recv(vc); vc->rndvSend_fn = MPIDI_CH3_RndvSend; vc->rndvRecv_fn = MPIDI_CH3_RecvRndv; vc->ready_eager_max_msg_sz = -1; /* no limit */; vc->eager_max_msg_sz = MPIR_CVAR_CH3_EAGER_MAX_MSG_SIZE; vc->sendNoncontig_fn = MPIDI_CH3_SendNoncontig_iov; #ifdef ENABLE_COMM_OVERRIDES vc->comm_ops = NULL; #endif /* FIXME: We need a better abstraction for initializing the thread state for an object */ #if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ { int err; MPID_Thread_mutex_create(&vc->pobj_mutex,&err); MPIR_Assert(err == 0); } #endif /* MPICH_THREAD_GRANULARITY */ MPIDI_CH3_VC_Init(vc); MPIDI_DBG_PrintVCState(vc); return MPI_SUCCESS; }
int MPID_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win * win_ptr, MPIR_Request ** request) { int mpi_errno = MPI_SUCCESS; int dt_contig ATTRIBUTE((unused)); MPIR_Datatype*dtp; MPI_Aint dt_true_lb ATTRIBUTE((unused)); intptr_t data_sz, trg_data_sz; MPIR_Request *ureq; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RGET_ACCUMULATE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RGET_ACCUMULATE); /* request-based RMA operations are only valid within a passive epoch */ MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET && win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED && win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED && win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync"); /* Create user request, initially cc=1, ref=1 */ ureq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA); MPIR_ERR_CHKANDJUMP(ureq == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq"); /* This request is referenced by user and ch3 by default. */ MPIR_Object_set_ref(ureq, 2); /* Note that GACC is only a no-op if no data goes in both directions */ MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb); MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, trg_data_sz, dtp, dt_true_lb); /* Enqueue or perform the RMA operation */ if (target_rank != MPI_PROC_NULL && (data_sz != 0 || trg_data_sz != 0)) { mpi_errno = MPIDI_CH3I_Get_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win_ptr, ureq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { mpi_errno = MPID_Request_complete(ureq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } *request = ureq; fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RGET_ACCUMULATE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Group_create( int nproc, MPIR_Group **new_group_ptr ) { int mpi_errno = MPI_SUCCESS; *new_group_ptr = (MPIR_Group *)MPIR_Handle_obj_alloc( &MPIR_Group_mem ); /* --BEGIN ERROR HANDLING-- */ if (!*new_group_ptr) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIR_Group_create", __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); return mpi_errno; } /* --END ERROR HANDLING-- */ MPIR_Object_set_ref( *new_group_ptr, 1 ); (*new_group_ptr)->lrank_to_lpid = (MPII_Group_pmap_t *)MPL_malloc( nproc * sizeof(MPII_Group_pmap_t) ); /* --BEGIN ERROR HANDLING-- */ if (!(*new_group_ptr)->lrank_to_lpid) { MPIR_Handle_obj_free( &MPIR_Group_mem, *new_group_ptr ); *new_group_ptr = NULL; MPIR_CHKMEM_SETERR(mpi_errno,nproc*sizeof(MPII_Group_pmap_t), "newgroup->lrank_to_lpid"); return mpi_errno; } /* --END ERROR HANDLING-- */ (*new_group_ptr)->size = nproc; /* Make sure that there is no question that the list of ranks sorted by pids is marked as uninitialized */ (*new_group_ptr)->idx_of_first_lpid = -1; (*new_group_ptr)->is_local_dense_monotonic = FALSE; return mpi_errno; }
static MPIR_Request *create_request(MPL_IOV * iov, int iov_count, int iov_offset, size_t nb) { MPIR_Request *sreq; int i; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPIR_Request_create(MPIR_REQUEST_KIND__SEND); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIR_Object_set_ref(sreq, 2); for (i = 0; i < iov_count; i++) { sreq->dev.iov[i] = iov[i]; } if (iov_offset == 0) { MPIR_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF; sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) & sreq->dev.pending_pkt; } sreq->dev.iov[iov_offset].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) ((char *) sreq->dev.iov[iov_offset].MPL_IOV_BUF + nb); sreq->dev.iov[iov_offset].MPL_IOV_LEN -= nb; sreq->dev.iov_count = iov_count; sreq->dev.OnDataAvail = 0; MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }
static MPIR_Request * create_request(void * hdr, intptr_t hdr_sz, size_t nb) { MPIR_Request * sreq; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIR_Object_set_ref(sreq, 2); sreq->kind = MPIR_REQUEST_KIND__SEND; MPIR_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr; sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char *) &sreq->dev.pending_pkt + nb); sreq->dev.iov[0].MPL_IOV_LEN = hdr_sz - nb; sreq->dev.iov_count = 1; sreq->dev.OnDataAvail = 0; MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }
/* Provides a way to trap all attribute allocations when debugging leaks. */ MPIR_Attribute *MPID_Attr_alloc(void) { MPIR_Attribute *attr = (MPIR_Attribute *) MPIR_Handle_obj_alloc(&MPID_Attr_mem); /* attributes don't have refcount semantics, but let's keep valgrind and * the debug logging pacified */ MPIR_Assert(attr != NULL); MPIR_Object_set_ref(attr, 0); return attr; }
int MPIR_Group_init(void) { int mpi_errno = MPI_SUCCESS; MPIR_Assert(MPIR_GROUP_N_BUILTIN == 1); /* update this func if this ever triggers */ MPIR_Group_builtin[0].handle = MPI_GROUP_EMPTY; MPIR_Object_set_ref(&MPIR_Group_builtin[0], 1); MPIR_Group_builtin[0].size = 0; MPIR_Group_builtin[0].rank = MPI_UNDEFINED; MPIR_Group_builtin[0].idx_of_first_lpid = -1; MPIR_Group_builtin[0].lrank_to_lpid = NULL; /* TODO hook for device here? */ return mpi_errno; }
int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn, MPI_Comm_delete_attr_function *comm_delete_attr_fn, int *comm_keyval, void *extra_state) { int mpi_errno = MPI_SUCCESS; MPII_Keyval *keyval_ptr; keyval_ptr = (MPII_Keyval *)MPIR_Handle_obj_alloc( &MPII_Keyval_mem ); MPIR_ERR_CHKANDJUMP(!keyval_ptr, mpi_errno, MPI_ERR_OTHER,"**nomem"); /* Initialize the attribute dup function */ if (!MPIR_Process.attr_dup) { MPIR_Process.attr_dup = MPIR_Attr_dup_list; MPIR_Process.attr_free = MPIR_Attr_delete_list; } /* The handle encodes the keyval kind. Modify it to have the correct field */ keyval_ptr->handle = (keyval_ptr->handle & ~(0x03c00000)) | (MPIR_COMM << 22); MPIR_Object_set_ref(keyval_ptr,1); keyval_ptr->was_freed = 0; keyval_ptr->kind = MPIR_COMM; keyval_ptr->extra_state = extra_state; keyval_ptr->copyfn.user_function = comm_copy_attr_fn; keyval_ptr->copyfn.proxy = MPII_Attr_copy_c_proxy; keyval_ptr->delfn.user_function = comm_delete_attr_fn; keyval_ptr->delfn.proxy = MPII_Attr_delete_c_proxy; MPIR_OBJ_PUBLISH_HANDLE(*comm_keyval, keyval_ptr->handle); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/*@ MPIR_Type_dup - create a copy of a datatype Input Parameters: - oldtype - handle of original datatype Output Parameters: . newtype - handle of newly created copy of datatype Return Value: 0 on success, MPI error code on failure. @*/ int MPIR_Type_dup(MPI_Datatype oldtype, MPI_Datatype * newtype) { int mpi_errno = MPI_SUCCESS; MPIR_Datatype *new_dtp = 0, *old_dtp; if (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN) { /* create a new type and commit it. */ mpi_errno = MPIR_Type_contiguous(1, oldtype, newtype); if (mpi_errno) { MPIR_ERR_POP(mpi_errno); } } else { /* allocate new datatype object and handle */ new_dtp = (MPIR_Datatype *) MPIR_Handle_obj_alloc(&MPIR_Datatype_mem); if (!new_dtp) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIR_Type_dup", __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; /* --END ERROR HANDLING-- */ } MPIR_Datatype_get_ptr(oldtype, old_dtp); /* fill in datatype */ MPIR_Object_set_ref(new_dtp, 1); /* new_dtp->handle is filled in by MPIR_Handle_obj_alloc() */ new_dtp->is_contig = old_dtp->is_contig; new_dtp->size = old_dtp->size; new_dtp->extent = old_dtp->extent; new_dtp->ub = old_dtp->ub; new_dtp->lb = old_dtp->lb; new_dtp->true_ub = old_dtp->true_ub; new_dtp->true_lb = old_dtp->true_lb; new_dtp->alignsize = old_dtp->alignsize; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; new_dtp->is_committed = old_dtp->is_committed; new_dtp->attributes = NULL; /* Attributes are copied in the * top-level MPI_Type_dup routine */ new_dtp->name[0] = 0; /* The Object name is not copied on * a dup */ new_dtp->n_builtin_elements = old_dtp->n_builtin_elements; new_dtp->builtin_element_size = old_dtp->builtin_element_size; new_dtp->basic_type = old_dtp->basic_type; new_dtp->max_contig_blocks = old_dtp->max_contig_blocks; new_dtp->dataloop = NULL; new_dtp->dataloop_size = old_dtp->dataloop_size; *newtype = new_dtp->handle; if (old_dtp->is_committed) { MPIR_Assert(old_dtp->dataloop != NULL); MPIR_Dataloop_dup(old_dtp->dataloop, old_dtp->dataloop_size, &new_dtp->dataloop); MPID_Type_commit_hook(new_dtp); } } MPL_DBG_MSG_D(MPIR_DBG_DATATYPE, VERBOSE, "dup type %x created.", *newtype); fn_fail: return mpi_errno; }
/*@ MPIDU_Type_vector - create a vector datatype Input Parameters: + count - number of blocks in vector . blocklength - number of elements in each block . stride - distance from beginning of one block to the next (see next parameter for units) . strideinbytes - if nonzero, then stride is in bytes, otherwise stride is in terms of extent of oldtype - oldtype - type (using handle) of datatype on which vector is based Output Parameters: . newtype - handle of new vector datatype Return Value: 0 on success, MPI error code on failure. @*/ int MPIDU_Type_vector(int count, int blocklength, MPI_Aint stride, int strideinbytes, MPI_Datatype oldtype, MPI_Datatype *newtype) { int mpi_errno = MPI_SUCCESS; int is_builtin, old_is_contig; MPI_Aint el_sz, old_sz; MPI_Datatype el_type; MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub, eff_stride; MPIDU_Datatype *new_dtp; if (count == 0) return MPIDU_Type_zerolen(newtype); /* allocate new datatype object and handle */ new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem); if (!new_dtp) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIDU_Type_vector", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; /* --END ERROR HANDLING-- */ } /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_permanent = 0; new_dtp->is_committed = 0; new_dtp->attributes = NULL; new_dtp->cache_id = 0; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; new_dtp->dataloop_depth = -1; new_dtp->hetero_dloop = NULL; new_dtp->hetero_dloop_size = -1; new_dtp->hetero_dloop_depth = -1; is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN); if (is_builtin) { el_sz = (MPI_Aint) MPIDU_Datatype_get_basic_size(oldtype); el_type = oldtype; old_lb = 0; old_true_lb = 0; old_ub = el_sz; old_true_ub = el_sz; old_sz = el_sz; old_extent = el_sz; old_is_contig = 1; new_dtp->size = (MPI_Aint) count * (MPI_Aint) blocklength * el_sz; new_dtp->has_sticky_lb = 0; new_dtp->has_sticky_ub = 0; new_dtp->alignsize = el_sz; /* ??? */ new_dtp->n_builtin_elements = count * blocklength; new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = count; eff_stride = (strideinbytes) ? stride : (stride * el_sz); } else /* user-defined base type (oldtype) */ { MPIDU_Datatype *old_dtp; MPIDU_Datatype_get_ptr(oldtype, old_dtp); el_sz = old_dtp->builtin_element_size; el_type = old_dtp->basic_type; old_lb = old_dtp->lb; old_true_lb = old_dtp->true_lb; old_ub = old_dtp->ub; old_true_ub = old_dtp->true_ub; old_sz = old_dtp->size; old_extent = old_dtp->extent; old_is_contig = old_dtp->is_contig; new_dtp->size = count * blocklength * old_dtp->size; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->alignsize = old_dtp->alignsize; new_dtp->n_builtin_elements = count * blocklength * old_dtp->n_builtin_elements; new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = old_dtp->max_contig_blocks * count * blocklength; eff_stride = (strideinbytes) ? stride : (stride * old_dtp->extent); } MPIDU_DATATYPE_VECTOR_LB_UB((MPI_Aint) count, eff_stride, (MPI_Aint) blocklength, old_lb, old_ub, old_extent, new_dtp->lb, new_dtp->ub); new_dtp->true_lb = new_dtp->lb + (old_true_lb - old_lb); new_dtp->true_ub = new_dtp->ub + (old_true_ub - old_ub); new_dtp->extent = new_dtp->ub - new_dtp->lb; /* new type is only contig for N types if old one was, and * size and extent of new type are equivalent, and stride is * equal to blocklength * size of old type. */ if ((MPI_Aint)(new_dtp->size) == new_dtp->extent && eff_stride == (MPI_Aint) blocklength * old_sz && old_is_contig) { new_dtp->is_contig = 1; new_dtp->max_contig_blocks = 1; } else { new_dtp->is_contig = 0; } *newtype = new_dtp->handle; MPL_DBG_MSG_P(MPIR_DBG_DATATYPE,VERBOSE,"vector type %x created.", new_dtp->handle); return mpi_errno; }
/* comm shrink impl; assumes that standard error checking has already taken * place in the calling function */ int MPIR_Comm_shrink(MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Group *global_failed = NULL, *comm_grp = NULL, *new_group_ptr = NULL; int attempts = 0; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK); /* TODO - Implement this function for intercommunicators */ MPIR_Comm_group_impl(comm_ptr, &comm_grp); do { errflag = MPIR_ERR_NONE; MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG); /* Ignore the mpi_errno value here as it will definitely communicate * with failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (MPIR_Group_empty != global_failed) MPIR_Group_release(global_failed); mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr); if (*newcomm_ptr == NULL) { errflag = MPIR_ERR_PROC_FAILED; } else if (mpi_errno) { errflag = MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER; MPIR_Comm_release(*newcomm_ptr); } mpi_errno = MPII_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, &errflag); MPIR_Group_release(new_group_ptr); if (errflag) { if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) { MPIR_Object_set_ref(*newcomm_ptr, 1); MPIR_Comm_release(*newcomm_ptr); } if (MPIR_Object_get_ref(new_group_ptr) > 0) { MPIR_Object_set_ref(new_group_ptr, 1); MPIR_Group_release(new_group_ptr); } } } while (errflag && ++attempts < 5); if (errflag && attempts >= 5) goto fn_fail; else mpi_errno = MPI_SUCCESS; fn_exit: MPIR_Group_release(comm_grp); MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK); return mpi_errno; fn_fail: if (*newcomm_ptr) MPIR_Object_set_ref(*newcomm_ptr, 0); MPIR_Object_set_ref(global_failed, 0); MPIR_Object_set_ref(new_group_ptr, 0); goto fn_exit; }
static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model, MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr) { int mpi_errno = MPI_SUCCESS; int i; MPIR_Comm *win_comm_ptr; int win_target_pool_size; MPIR_CHKPMEM_DECL(5); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_INIT); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); if (initRMAoptions) { MPIDI_CH3_RMA_Init_sync_pvars(); MPIDI_CH3_RMA_Init_pkthandler_pvars(); initRMAoptions = 0; } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); *win_ptr = (MPIR_Win *) MPIR_Handle_obj_alloc(&MPIR_Win_mem); MPIR_ERR_CHKANDJUMP1(!(*win_ptr), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Win_mem"); mpi_errno = MPIR_Comm_dup_impl(comm_ptr, &win_comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Object_set_ref(*win_ptr, 1); /* (*win_ptr)->errhandler is set by upper level; */ /* (*win_ptr)->base is set by caller; */ (*win_ptr)->size = size; (*win_ptr)->disp_unit = disp_unit; (*win_ptr)->create_flavor = create_flavor; (*win_ptr)->model = model; (*win_ptr)->attributes = NULL; (*win_ptr)->comm_ptr = win_comm_ptr; (*win_ptr)->at_completion_counter = 0; (*win_ptr)->shm_base_addrs = NULL; /* (*win_ptr)->basic_info_table[] is set by caller; */ (*win_ptr)->current_lock_type = MPID_LOCK_NONE; (*win_ptr)->shared_lock_ref_cnt = 0; (*win_ptr)->target_lock_queue_head = NULL; (*win_ptr)->shm_allocated = FALSE; (*win_ptr)->states.access_state = MPIDI_RMA_NONE; (*win_ptr)->states.exposure_state = MPIDI_RMA_NONE; (*win_ptr)->num_targets_with_pending_net_ops = 0; (*win_ptr)->start_ranks_in_win_grp = NULL; (*win_ptr)->start_grp_size = 0; (*win_ptr)->lock_all_assert = 0; (*win_ptr)->lock_epoch_count = 0; (*win_ptr)->outstanding_locks = 0; (*win_ptr)->current_target_lock_data_bytes = 0; (*win_ptr)->sync_request_cnt = 0; (*win_ptr)->active = FALSE; (*win_ptr)->next = NULL; (*win_ptr)->prev = NULL; (*win_ptr)->outstanding_acks = 0; /* Initialize the info flags */ (*win_ptr)->info_args.no_locks = 0; (*win_ptr)->info_args.accumulate_ordering = MPIDI_ACC_ORDER_RAR | MPIDI_ACC_ORDER_RAW | MPIDI_ACC_ORDER_WAR | MPIDI_ACC_ORDER_WAW; (*win_ptr)->info_args.accumulate_ops = MPIDI_ACC_OPS_SAME_OP_NO_OP; (*win_ptr)->info_args.same_size = 0; (*win_ptr)->info_args.same_disp_unit = FALSE; (*win_ptr)->info_args.alloc_shared_noncontig = 0; (*win_ptr)->info_args.alloc_shm = FALSE; if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE || (*win_ptr)->create_flavor == MPI_WIN_FLAVOR_SHARED) { (*win_ptr)->info_args.alloc_shm = TRUE; } /* Set info_args on window based on info provided by user */ mpi_errno = MPID_Win_set_info((*win_ptr), info); if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP(mpi_errno); MPIR_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *, sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno, "RMA op pool", MPL_MEM_RMA); (*win_ptr)->op_pool_head = NULL; for (i = 0; i < MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE; i++) { (*win_ptr)->op_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN; DL_APPEND((*win_ptr)->op_pool_head, &((*win_ptr)->op_pool_start[i])); } win_target_pool_size = MPL_MIN(MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE, MPIR_Comm_size(win_comm_ptr)); MPIR_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, MPIDI_RMA_Target_t *, sizeof(MPIDI_RMA_Target_t) * win_target_pool_size, mpi_errno, "RMA target pool", MPL_MEM_RMA); (*win_ptr)->target_pool_head = NULL; for (i = 0; i < win_target_pool_size; i++) { (*win_ptr)->target_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN; DL_APPEND((*win_ptr)->target_pool_head, &((*win_ptr)->target_pool_start[i])); } (*win_ptr)->num_slots = MPL_MIN(MPIR_CVAR_CH3_RMA_SLOTS_SIZE, MPIR_Comm_size(win_comm_ptr)); MPIR_CHKPMEM_MALLOC((*win_ptr)->slots, MPIDI_RMA_Slot_t *, sizeof(MPIDI_RMA_Slot_t) * (*win_ptr)->num_slots, mpi_errno, "RMA slots", MPL_MEM_RMA); for (i = 0; i < (*win_ptr)->num_slots; i++) { (*win_ptr)->slots[i].target_list_head = NULL; } MPIR_CHKPMEM_MALLOC((*win_ptr)->target_lock_entry_pool_start, MPIDI_RMA_Target_lock_entry_t *, sizeof(MPIDI_RMA_Target_lock_entry_t) * MPIR_CVAR_CH3_RMA_TARGET_LOCK_ENTRY_WIN_POOL_SIZE, mpi_errno, "RMA lock entry pool", MPL_MEM_RMA); (*win_ptr)->target_lock_entry_pool_head = NULL; for (i = 0; i < MPIR_CVAR_CH3_RMA_TARGET_LOCK_ENTRY_WIN_POOL_SIZE; i++) { DL_APPEND((*win_ptr)->target_lock_entry_pool_head, &((*win_ptr)->target_lock_entry_pool_start[i])); } if (MPIDI_RMA_Win_inactive_list_head == NULL && MPIDI_RMA_Win_active_list_head == NULL) { /* this is the first window, register RMA progress hook */ mpi_errno = MPID_Progress_register_hook(MPIDI_CH3I_RMA_Make_progress_global, &MPIDI_CH3I_RMA_Progress_hook_id); if (mpi_errno) { MPIR_ERR_POP(mpi_errno); } } DL_APPEND(MPIDI_RMA_Win_inactive_list_head, (*win_ptr)); if (MPIDI_CH3U_Win_hooks.win_init != NULL) { mpi_errno = MPIDI_CH3U_Win_hooks.win_init(size, disp_unit, create_flavor, model, info, comm_ptr, win_ptr); if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_INIT); return mpi_errno; fn_fail: MPIR_CHKPMEM_REAP(); goto fn_exit; }
int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPIR_Comm * comm, int context_offset, MPIR_Request ** request) { intptr_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPIR_Datatype* dt_ptr; MPIR_Request * sreq; MPIDI_VC_t * vc=0; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ISSEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ISSEND); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_TAG_COLL_BIT) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_TAG_COLL_BIT)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPIR_COMM_KIND__INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq); goto fn_exit; } if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); /* this needs to come before the sreq is created, since the override */ /* function is responsible for creating its own request */ #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->issend) { mpi_errno = vc->comm_ops->issend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND); if (rank == MPI_PROC_NULL) { MPIR_Object_set_ref(sreq, 1); MPIR_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, context_offset ); goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold) { mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count, datatype, data_sz, dt_contig, dt_true_lb, rank, tag, comm, context_offset ); /* If we're not complete and this is a derived datatype * communication, then add a reference to the datatype */ if (sreq && (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN)) { sreq->dev.datatype_ptr = dt_ptr; MPIR_Datatype_ptr_add_ref(dt_ptr); } } else { /* Note that the sreq was created above */ MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_RNDV_MSG); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPIR_Datatype_ptr_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPL_DBG_STMT(MPIDI_CH3_DBG_OTHER,VERBOSE, { if (sreq != NULL) { MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE, "request allocated, handle=0x%08x", sreq->handle); } } )
int MPIDU_Type_indexed(int count, const int *blocklength_array, const void *displacement_array, int dispinbytes, MPI_Datatype oldtype, MPI_Datatype *newtype) { int mpi_errno = MPI_SUCCESS; int is_builtin, old_is_contig; int i; MPI_Aint contig_count; MPI_Aint el_sz, el_ct, old_ct, old_sz; MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub; MPI_Aint min_lb = 0, max_ub = 0, eff_disp; MPI_Datatype el_type; MPIDU_Datatype *new_dtp; if (count == 0) return MPIDU_Type_zerolen(newtype); /* sanity check that blocklens are all non-negative */ for (i = 0; i < count; ++i) { DLOOP_Assert(blocklength_array[i] >= 0); } /* allocate new datatype object and handle */ new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem); /* --BEGIN ERROR HANDLING-- */ if (!new_dtp) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIDU_Type_indexed", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_permanent = 0; new_dtp->is_committed = 0; new_dtp->attributes = NULL; new_dtp->cache_id = 0; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; new_dtp->dataloop_depth = -1; new_dtp->hetero_dloop = NULL; new_dtp->hetero_dloop_size = -1; new_dtp->hetero_dloop_depth = -1; is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN); if (is_builtin) { /* builtins are handled differently than user-defined types because * they have no associated dataloop or datatype structure. */ el_sz = MPIDU_Datatype_get_basic_size(oldtype); old_sz = el_sz; el_ct = 1; el_type = oldtype; old_lb = 0; old_true_lb = 0; old_ub = (MPI_Aint) el_sz; old_true_ub = (MPI_Aint) el_sz; old_extent = (MPI_Aint) el_sz; old_is_contig = 1; new_dtp->has_sticky_ub = 0; new_dtp->has_sticky_lb = 0; MPIR_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint); new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = count; } else { /* user-defined base type (oldtype) */ MPIDU_Datatype *old_dtp; MPIDU_Datatype_get_ptr(oldtype, old_dtp); /* Ensure that "builtin_element_size" fits into an int datatype. */ MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size); el_sz = old_dtp->builtin_element_size; old_sz = old_dtp->size; el_ct = old_dtp->n_builtin_elements; el_type = old_dtp->basic_type; old_lb = old_dtp->lb; old_true_lb = old_dtp->true_lb; old_ub = old_dtp->ub; old_true_ub = old_dtp->true_ub; old_extent = old_dtp->extent; old_is_contig = old_dtp->is_contig; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->builtin_element_size = (MPI_Aint) el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = 0; for(i=0; i<count; i++) new_dtp->max_contig_blocks += old_dtp->max_contig_blocks * ((MPI_Aint ) blocklength_array[i]); } /* find the first nonzero blocklength element */ i = 0; while (i < count && blocklength_array[i] == 0) i++; if (i == count) { MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp); return MPIDU_Type_zerolen(newtype); } /* priming for loop */ old_ct = blocklength_array[i]; eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] : (((MPI_Aint) ((int *) displacement_array)[i]) * old_extent); MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength_array[i], eff_disp, old_lb, old_ub, old_extent, min_lb, max_ub); /* determine min lb, max ub, and count of old types in remaining * nonzero size blocks */ for (i++; i < count; i++) { MPI_Aint tmp_lb, tmp_ub; if (blocklength_array[i] > 0) { old_ct += blocklength_array[i]; /* add more oldtypes */ eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] : (((MPI_Aint) ((int *) displacement_array)[i]) * old_extent); /* calculate ub and lb for this block */ MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint)(blocklength_array[i]), eff_disp, old_lb, old_ub, old_extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } } new_dtp->size = old_ct * old_sz; new_dtp->lb = min_lb; new_dtp->ub = max_ub; new_dtp->true_lb = min_lb + (old_true_lb - old_lb); new_dtp->true_ub = max_ub + (old_true_ub - old_ub); new_dtp->extent = max_ub - min_lb; new_dtp->n_builtin_elements = old_ct * el_ct; /* new type is only contig for N types if it's all one big * block, its size and extent are the same, and the old type * was also contiguous. */ new_dtp->is_contig = 0; if(old_is_contig) { MPI_Aint *blklens = MPL_malloc(count *sizeof(MPI_Aint)); for (i=0; i<count; i++) blklens[i] = blocklength_array[i]; contig_count = MPIDU_Type_indexed_count_contig(count, blklens, displacement_array, dispinbytes, old_extent); new_dtp->max_contig_blocks = contig_count; if( (contig_count == 1) && ((MPI_Aint) new_dtp->size == new_dtp->extent)) { new_dtp->is_contig = 1; } MPL_free(blklens); } *newtype = new_dtp->handle; return mpi_errno; }
int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, int context_offset, int *flag, MPIR_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; ptl_process_t id_any; ptl_me_t me; MPIR_Request *req; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; /* create a request */ req = MPIR_Request_create(MPIR_REQUEST_KIND__MPROBE); MPID_nem_ptl_init_req(req); MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Request_create"); MPIR_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */ REQ_PTL(req)->event_handler = handle_mprobe; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source); if (source == MPI_ANY_SOURCE) me.match_id = id_any; else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } if (tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPIR_Request_is_complete(req)); *flag = REQ_PTL(req)->found; if (*flag) { req->comm = comm; MPIR_Comm_add_ref(comm); MPIR_Request_extract_status(req, status); *message = req; } else { MPIR_Request_free(req); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr) { MPIDI_PG_t * pg = NULL, *pgnext; int p; int mpi_errno = MPI_SUCCESS; MPIR_CHKPMEM_DECL(2); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_CREATE); MPIR_CHKPMEM_MALLOC(pg,MPIDI_PG_t*,sizeof(MPIDI_PG_t),mpi_errno,"pg"); MPIR_CHKPMEM_MALLOC(pg->vct,MPIDI_VC_t *,sizeof(MPIDI_VC_t)*vct_sz, mpi_errno,"pg->vct"); if (verbose) { fprintf( stdout, "Creating a process group of size %d\n", vct_sz ); fflush(stdout); } pg->handle = 0; /* The reference count indicates the number of vc's that are or have been in use and not disconnected. It starts at zero, except for MPI_COMM_WORLD. */ MPIR_Object_set_ref(pg, 0); pg->size = vct_sz; pg->id = pg_id; pg->finalize = 0; /* Initialize the connection information to null. Use the appropriate MPIDI_PG_InitConnXXX routine to set up these fields */ pg->connData = 0; pg->getConnInfo = 0; pg->connInfoToString = 0; pg->connInfoFromString = 0; pg->freeConnInfo = 0; for (p = 0; p < vct_sz; p++) { /* Initialize device fields in the VC object */ MPIDI_VC_Init(&pg->vct[p], pg, p); } /* We may first need to initialize the channel before calling the channel VC init functions. This routine may be a no-op; look in the ch3_init.c file in each channel */ MPIDI_CH3_PG_Init(pg); /* These are now done in MPIDI_VC_Init */ #if 0 for (p = 0; p < vct_sz; p++) { /* Initialize the channel fields in the VC object */ MPIDI_CH3_VC_Init( &pg->vct[p] ); } #endif /* The first process group is always the world group */ if (!pg_world) { pg_world = pg; } /* Add pg's at the tail so that comm world is always the first pg */ pg->next = 0; if (!MPIDI_PG_list) { MPIDI_PG_list = pg; } else { pgnext = MPIDI_PG_list; while (pgnext->next) { pgnext = pgnext->next; } pgnext->next = pg; } *pg_ptr = pg; fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_CREATE); return mpi_errno; fn_fail: MPIR_CHKPMEM_REAP(); goto fn_exit; }
int MPID_Cancel_send(MPIR_Request * sreq) { MPIDI_VC_t * vc; int proto; int flag; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND); MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND); MPIDI_Request_cancel_pending(sreq, &flag); if (flag) { goto fn_exit; } /* * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer * and no pointer to the underlying communication * request. For now, we simply fail to cancel the request. In the future, * we should add a new request kind to indicate that * the request is a BSEND. Then we can properly cancel the request, much * in the way we do persistent requests. */ if (sreq->comm == NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc); proto = MPIDI_Request_get_msg_type(sreq); if (proto == MPIDI_REQUEST_SELF_MSG) { MPIR_Request * rreq; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE, "attempting to cancel message sent to self"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); if (rreq) { MPIR_Assert(rreq->dev.partner_request == sreq); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); /* Pull the message out of the unexpected queue since it's * being cancelled. The below request release drops one * reference. We explicitly drop a second reference, * because the receive request will never be visible to * the user. */ MPIR_Request_free(rreq); MPIR_Request_free(rreq); MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); } goto fn_exit; } /* If the message went over a netmod and it provides a cancel_send function, call it here. */ #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->cancel_send) { mpi_errno = vc->comm_ops->cancel_send(vc, sreq); goto fn_exit; } #endif /* Check to see if the send is still in the send queue. If so, remove it, mark the request and cancelled and complete, and release the device's reference to the request object. */ { int cancelled; if (proto == MPIDI_REQUEST_RNDV_MSG) { MPIR_Request * rts_sreq; /* The cancellation of the RTS request needs to be atomic through the destruction of the RTS request to avoid conflict with release of the RTS request if the CTS is received (see handling of a rendezvous CTS packet in MPIDI_CH3U_Handle_recv_pkt()). MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee that atomicity. */ MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq); if (rts_sreq != NULL) { cancelled = FALSE; /* since we attempted to cancel a RTS request, then we are responsible for releasing that request */ MPIR_Request_free(rts_sreq); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|cancelrndv", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } else { cancelled = FALSE; if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } /* Part or all of the message has already been sent, so we need to send a cancellation request to the receiver in an attempt to catch the message before it is matched. */ { int was_incomplete; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req; MPIR_Request * csr_sreq; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending cancel request to %d for 0x%08x", sreq->dev.match.parts.rank, sreq->handle)); /* The completion counter and reference count are incremented to keep the request around long enough to receive a response regardless of what the user does (free the request before waiting, etc.). */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); if (!was_incomplete) { /* The reference count is incremented only if the request was complete before the increment. */ MPIR_Request_add_ref( sreq ); } MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ); csr_pkt->match.parts.rank = sreq->comm->rank; csr_pkt->match.parts.tag = sreq->dev.match.parts.tag; csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id; csr_pkt->sender_req_id = sreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq"); } if (csr_sreq != NULL) { MPIR_Request_free(csr_sreq); } } /* FIXME: if send cancellation packets are allowed to arrive out-of-order with respect to send packets, then we need to timestamp send and cancel packets to insure that a cancellation request does not bypass the send packet to be cancelled and erroneously cancel a previously sent message with the same request handle. */ /* FIXME: A timestamp is more than is necessary; a message sequence number should be adequate. */ fn_fail: fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND); return mpi_errno; }
/*@ MPIDU_Type_create_pairtype - create necessary data structures for certain pair types (all but MPI_2INT etc., which never have the size != extent issue). This function is different from the other MPIDU_Type_create functions in that it fills in an already- allocated MPIDU_Datatype. This is important for allowing configure-time determination of the MPI type values (these types are stored in the "direct" space, for those familiar with how MPICH deals with type allocation). Input Parameters: + type - name of pair type (e.g. MPI_FLOAT_INT) - new_dtp - pointer to previously allocated datatype structure, which is filled in by this function Return Value: MPI_SUCCESS on success, MPI errno on failure. Note: Section 4.9.3 (MINLOC and MAXLOC) of the MPI-1 standard specifies that these types should be built as if by the following (e.g. MPI_FLOAT_INT): type[0] = MPI_FLOAT type[1] = MPI_INT disp[0] = 0 disp[1] = sizeof(float) <---- questionable displacement! block[0] = 1 block[1] = 1 MPI_TYPE_STRUCT(2, block, disp, type, MPI_FLOAT_INT) However, this is a relatively naive approach that does not take struct padding into account when setting the displacement of the second element. Thus in our implementation we have chosen to instead use the actual difference in starting locations of the two types in an actual struct. @*/ int MPIDU_Type_create_pairtype(MPI_Datatype type, MPIDU_Datatype *new_dtp) { int err, mpi_errno = MPI_SUCCESS; int type_size, alignsize; MPI_Aint type_extent, true_ub, el_size; /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_permanent = 1; new_dtp->is_committed = 1; /* predefined types are pre-committed */ new_dtp->attributes = NULL; new_dtp->cache_id = 0; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; new_dtp->dataloop_depth = -1; new_dtp->hetero_dloop = NULL; new_dtp->hetero_dloop_size = -1; new_dtp->hetero_dloop_depth = -1; switch(type) { case MPI_FLOAT_INT: PAIRTYPE_SIZE_EXTENT(MPI_FLOAT, float, MPI_INT, int, type_size, type_extent, el_size, true_ub, alignsize); break; case MPI_DOUBLE_INT: PAIRTYPE_SIZE_EXTENT(MPI_DOUBLE, double, MPI_INT, int, type_size, type_extent, el_size, true_ub, alignsize); break; case MPI_LONG_INT: PAIRTYPE_SIZE_EXTENT(MPI_LONG, long, MPI_INT, int, type_size, type_extent, el_size, true_ub, alignsize); break; case MPI_SHORT_INT: PAIRTYPE_SIZE_EXTENT(MPI_SHORT, short, MPI_INT, int, type_size, type_extent, el_size, true_ub, alignsize); break; case MPI_LONG_DOUBLE_INT: PAIRTYPE_SIZE_EXTENT(MPI_LONG_DOUBLE, long double, MPI_INT, int, type_size, type_extent, el_size, true_ub, alignsize); break; default: /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIDU_Type_create_pairtype", __LINE__, MPI_ERR_OTHER, "**dtype", 0); return mpi_errno; /* --END ERROR HANDLING-- */ } new_dtp->n_builtin_elements = 2; new_dtp->builtin_element_size = el_size; new_dtp->basic_type = type; new_dtp->has_sticky_lb = 0; new_dtp->true_lb = 0; new_dtp->lb = 0; new_dtp->has_sticky_ub = 0; new_dtp->true_ub = true_ub; new_dtp->size = type_size; new_dtp->ub = type_extent; /* possible padding */ new_dtp->extent = type_extent; new_dtp->alignsize = alignsize; /* place maximum on alignment based on padding rules */ /* There are some really wierd rules for structure alignment; these capture the ones of which we are aware. */ switch(type) { case MPI_SHORT_INT: case MPI_LONG_INT: #ifdef HAVE_MAX_INTEGER_ALIGNMENT new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_INTEGER_ALIGNMENT); #endif break; case MPI_FLOAT_INT: #ifdef HAVE_MAX_FP_ALIGNMENT new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_FP_ALIGNMENT); #endif break; case MPI_DOUBLE_INT: #ifdef HAVE_MAX_DOUBLE_FP_ALIGNMENT new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_DOUBLE_FP_ALIGNMENT); #elif defined(HAVE_MAX_FP_ALIGNMENT) new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_FP_ALIGNMENT); #endif break; case MPI_LONG_DOUBLE_INT: #ifdef HAVE_MAX_LONG_DOUBLE_FP_ALIGNMENT new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_LONG_DOUBLE_FP_ALIGNMENT); #elif defined(HAVE_MAX_FP_ALIGNMENT) new_dtp->alignsize = MPL_MIN(new_dtp->alignsize, HAVE_MAX_FP_ALIGNMENT); #endif break; } new_dtp->is_contig = (((MPI_Aint) type_size) == type_extent) ? 1 : 0; new_dtp->max_contig_blocks = (((MPI_Aint) type_size) == type_extent) ? 1 : 2; /* fill in dataloops -- only case where we precreate dataloops * * this is necessary because these types aren't committed by the * user, which is the other place where we create dataloops. so * if the user uses one of these w/out building some more complex * type and then committing it, then the dataloop will be missing. */ #ifdef MPID_NEEDS_DLOOP_ALL_BYTES /* If MPID implementation needs use to reduce everything to a byte stream, do that. */ err = MPIDU_Dataloop_create_pairtype(type, &(new_dtp->dataloop), &(new_dtp->dataloop_size), &(new_dtp->dataloop_depth), MPIDU_DATALOOP_ALL_BYTES); #else err = MPIDU_Dataloop_create_pairtype(type, &(new_dtp->dataloop), &(new_dtp->dataloop_size), &(new_dtp->dataloop_depth), MPIDU_DATALOOP_HOMOGENEOUS); #endif if (!err) { err = MPIDU_Dataloop_create_pairtype(type, &(new_dtp->hetero_dloop), &(new_dtp->hetero_dloop_size), &(new_dtp->hetero_dloop_depth), MPIDU_DATALOOP_HETEROGENEOUS); } #ifdef MPID_Type_commit_hook if (!err) { err = MPID_Type_commit_hook(new_dtp); } #endif /* MPID_Type_commit_hook */ /* --BEGIN ERROR HANDLING-- */ if (err) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIDU_Dataloop_create_pairtype", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ return mpi_errno; }
int MPID_nem_send_iov(MPIDI_VC_t *vc, MPIR_Request **sreq_ptr, MPL_IOV *iov, int n_iov) { int mpi_errno = MPI_SUCCESS; intptr_t data_sz; int i; int iov_data_copied; MPIR_Request *sreq = *sreq_ptr; MPL_IOV *data_iov = &iov[1]; /* iov of just the data, not the header */ int data_n_iov = n_iov - 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_SEND_IOV); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_SEND_IOV); if (*sreq_ptr == NULL) { /* create a request */ sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED); MPIR_Assert(sreq != NULL); MPIR_Object_set_ref(sreq, 2); sreq->kind = MPIR_REQUEST_KIND__SEND; sreq->dev.OnDataAvail = 0; } data_sz = 0; for (i = 0; i < data_n_iov; ++i) data_sz += data_iov[i].MPL_IOV_LEN; if (!MPIDI_Request_get_srbuf_flag(sreq)) { MPIDI_CH3U_SRBuf_alloc(sreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (sreq->dev.tmpbuf_sz == 0) { MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); sreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } MPIR_Assert(sreq->dev.tmpbuf_sz >= data_sz); iov_data_copied = 0; for (i = 0; i < data_n_iov; ++i) { MPIR_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, data_iov[i].MPL_IOV_BUF, data_iov[i].MPL_IOV_LEN); iov_data_copied += data_iov[i].MPL_IOV_LEN; } mpi_errno = vc->ch.iSendContig(vc, sreq, iov[0].MPL_IOV_BUF, iov[0].MPL_IOV_LEN, sreq->dev.tmpbuf, data_sz); if (mpi_errno) MPIR_ERR_POP(mpi_errno); *sreq_ptr = sreq; fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_SEND_IOV); return mpi_errno; fn_fail: goto fn_exit; }
/*@ MPIDI_PG_Finalize - Finalize the process groups, including freeing all process group structures @*/ int MPIDI_PG_Finalize(void) { int mpi_errno = MPI_SUCCESS; MPIDI_PG_t *pg, *pgNext; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_FINALIZE); /* Print the state of the process groups */ if (verbose) { MPIU_PG_Printall( stdout ); } /* FIXME - straighten out the use of PMI_Finalize - no use after PG_Finalize */ if (pg_world->connData) { #ifdef USE_PMI2_API mpi_errno = PMI2_Finalize(); if (mpi_errno) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|pmi_finalize"); #else int rc; rc = PMI_Finalize(); if (rc) { MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER, "**ch3|pmi_finalize", "**ch3|pmi_finalize %d", rc); } #endif } /* Free the storage associated with the process groups */ pg = MPIDI_PG_list; while (pg) { pgNext = pg->next; /* In finalize, we free all process group information, even if the ref count is not zero. This can happen if the user fails to use MPI_Comm_disconnect on communicators that were created with the dynamic process routines.*/ /* XXX DJG FIXME-MT should we be checking this? */ if (MPIR_Object_get_ref(pg) == 0 || 1) { if (pg == MPIDI_Process.my_pg) MPIDI_Process.my_pg = NULL; MPIR_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */ MPIDI_PG_Destroy( pg ); } pg = pgNext; } /* If COMM_WORLD is still around (it normally should be), try to free it here. The reason that we need to free it at this point is that comm_world (and comm_self) still exist, and hence the usual process to free the related VC structures will not be invoked. */ if (MPIDI_Process.my_pg) { MPIDI_PG_Destroy(MPIDI_Process.my_pg); } MPIDI_Process.my_pg = NULL; /* ifdefing out this check because the list will not be NULL in Ch3_finalize because one additional reference is retained in MPIDI_Process.my_pg. That reference is released only after ch3_finalize returns. If I release it before ch3_finalize, the ssm channel crashes. */ #if 0 if (MPIDI_PG_list != NULL) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_INTERN, "**dev|pg_finalize|list_not_empty", NULL); /* --END ERROR HANDLING-- */ } #endif MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_FINALIZE); return mpi_errno; }
/*@ MPIR_Type_contiguous - create a contiguous datatype Input Parameters: + count - number of elements in the contiguous block - oldtype - type (using handle) of datatype on which vector is based Output Parameters: . newtype - handle of new contiguous datatype Return Value: MPI_SUCCESS on success, MPI error code on failure. @*/ int MPIR_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype) { int mpi_errno = MPI_SUCCESS; int is_builtin; MPI_Aint el_sz; MPI_Datatype el_type; MPIR_Datatype *new_dtp; if (count == 0) return MPII_Type_zerolen(newtype); /* allocate new datatype object and handle */ new_dtp = (MPIR_Datatype *) MPIR_Handle_obj_alloc(&MPIR_Datatype_mem); /* --BEGIN ERROR HANDLING-- */ if (!new_dtp) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIR_Type_contiguous", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_permanent = 0; new_dtp->is_committed = 0; new_dtp->attributes = NULL; new_dtp->cache_id = 0; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; new_dtp->dataloop_depth = -1; new_dtp->hetero_dloop = NULL; new_dtp->hetero_dloop_size = -1; new_dtp->hetero_dloop_depth = -1; is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN); if (is_builtin) { el_sz = MPIR_Datatype_get_basic_size(oldtype); el_type = oldtype; new_dtp->size = count * el_sz; new_dtp->has_sticky_ub = 0; new_dtp->has_sticky_lb = 0; new_dtp->true_lb = 0; new_dtp->lb = 0; new_dtp->true_ub = count * el_sz; new_dtp->ub = new_dtp->true_ub; new_dtp->extent = new_dtp->ub - new_dtp->lb; new_dtp->alignsize = el_sz; new_dtp->n_builtin_elements = count; new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->is_contig = 1; new_dtp->max_contig_blocks = 1; } else { /* user-defined base type (oldtype) */ MPIR_Datatype *old_dtp; MPIR_Datatype_get_ptr(oldtype, old_dtp); el_sz = old_dtp->builtin_element_size; el_type = old_dtp->basic_type; new_dtp->size = count * old_dtp->size; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; MPII_DATATYPE_CONTIG_LB_UB((MPI_Aint) count, old_dtp->lb, old_dtp->ub, old_dtp->extent, new_dtp->lb, new_dtp->ub); /* easiest to calc true lb/ub relative to lb/ub; doesn't matter * if there are sticky lb/ubs or not when doing this. */ new_dtp->true_lb = new_dtp->lb + (old_dtp->true_lb - old_dtp->lb); new_dtp->true_ub = new_dtp->ub + (old_dtp->true_ub - old_dtp->ub); new_dtp->extent = new_dtp->ub - new_dtp->lb; new_dtp->alignsize = old_dtp->alignsize; new_dtp->n_builtin_elements = count * old_dtp->n_builtin_elements; new_dtp->builtin_element_size = old_dtp->builtin_element_size; new_dtp->basic_type = el_type; MPIR_Datatype_is_contig(oldtype, &new_dtp->is_contig); if(new_dtp->is_contig) new_dtp->max_contig_blocks = 1; else new_dtp->max_contig_blocks = count * old_dtp->max_contig_blocks; } *newtype = new_dtp->handle; MPL_DBG_MSG_P(MPIR_DBG_DATATYPE,VERBOSE,"contig type %x created.", new_dtp->handle); return mpi_errno; }
int MPIR_Init_thread(int *argc, char ***argv, int required, int *provided) { int mpi_errno = MPI_SUCCESS; int has_args; int has_env; int thread_provided = 0; int exit_init_cs_on_failure = 0; MPIR_Info *info_ptr; #if defined(MPICH_IS_THREADED) bool cs_initialized = false; #endif /* The threading library must be initialized at the very beginning because * it manages all synchronization objects (e.g., mutexes) that will be * initialized later */ { int thread_err; MPL_thread_init(&thread_err); if (thread_err) goto fn_fail; } #ifdef HAVE_HWLOC MPIR_Process.bindset = hwloc_bitmap_alloc(); hwloc_topology_init(&MPIR_Process.hwloc_topology); MPIR_Process.bindset_is_valid = 0; hwloc_topology_set_io_types_filter(MPIR_Process.hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL); if (!hwloc_topology_load(MPIR_Process.hwloc_topology)) { MPIR_Process.bindset_is_valid = !hwloc_get_proc_cpubind(MPIR_Process.hwloc_topology, getpid(), MPIR_Process.bindset, HWLOC_CPUBIND_PROCESS); } #endif #ifdef HAVE_NETLOC MPIR_Process.network_attr.u.tree.node_levels = NULL; MPIR_Process.network_attr.network_endpoint = NULL; MPIR_Process.netloc_topology = NULL; MPIR_Process.network_attr.type = MPIR_NETLOC_NETWORK_TYPE__INVALID; if (strlen(MPIR_CVAR_NETLOC_NODE_FILE)) { mpi_errno = netloc_parse_topology(&MPIR_Process.netloc_topology, MPIR_CVAR_NETLOC_NODE_FILE); if (mpi_errno == NETLOC_SUCCESS) { MPIR_Netloc_parse_topology(MPIR_Process.netloc_topology, &MPIR_Process.network_attr); } } #endif /* For any code in the device that wants to check for runtime * decisions on the value of isThreaded, set a provisional * value here. We could let the MPID_Init routine override this */ #if defined MPICH_IS_THREADED MPIR_ThreadInfo.isThreaded = required == MPI_THREAD_MULTIPLE; #endif /* MPICH_IS_THREADED */ #if defined(MPICH_IS_THREADED) mpi_errno = thread_cs_init(); cs_initialized = true; if (mpi_errno) MPIR_ERR_POP(mpi_errno); #endif /* FIXME: Move to os-dependent interface? */ #ifdef HAVE_WINDOWS_H /* prevent the process from bringing up an error message window if mpich * asserts */ _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE); _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); _CrtSetReportHook2(_CRT_RPTHOOK_INSTALL, assert_hook); #ifdef _WIN64 { /* FIXME: (Windows) This severly degrades performance but fixes alignment * issues with the datatype code. */ /* Prevent misaligned faults on Win64 machines */ UINT mode, old_mode; old_mode = SetErrorMode(SEM_NOALIGNMENTFAULTEXCEPT); mode = old_mode | SEM_NOALIGNMENTFAULTEXCEPT; SetErrorMode(mode); } #endif #endif /* We need this inorder to implement IS_THREAD_MAIN */ #if (MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED) && defined(MPICH_IS_THREADED) { MPID_Thread_self(&MPIR_ThreadInfo.master_thread); } #endif #ifdef HAVE_ERROR_CHECKING /* Because the PARAM system has not been initialized, temporarily * uncondtionally enable error checks. Once the PARAM system is * initialized, this may be reset */ MPIR_Process.do_error_checks = 1; #else MPIR_Process.do_error_checks = 0; #endif /* Initialize necessary subsystems and setup the predefined attribute * values. Subsystems may change these values. */ MPIR_Process.attrs.appnum = -1; MPIR_Process.attrs.host = MPI_PROC_NULL; MPIR_Process.attrs.io = MPI_PROC_NULL; MPIR_Process.attrs.lastusedcode = MPI_ERR_LASTCODE; MPIR_Process.attrs.universe = MPIR_UNIVERSE_SIZE_NOT_SET; MPIR_Process.attrs.wtime_is_global = 0; /* Set the functions used to duplicate attributes. These are * when the first corresponding keyval is created */ MPIR_Process.attr_dup = 0; MPIR_Process.attr_free = 0; #ifdef HAVE_CXX_BINDING /* Set the functions used to call functions in the C++ binding * for reductions and attribute operations. These are null * until a C++ operation is defined. This allows the C code * that implements these operations to not invoke a C++ code * directly, which may force the inclusion of symbols known only * to the C++ compiler (e.g., under more non-GNU compilers, including * Solaris and IRIX). */ MPIR_Process.cxx_call_op_fn = 0; #endif #ifdef HAVE_F08_BINDING MPIR_C_MPI_UNWEIGHTED = MPI_UNWEIGHTED; MPIR_C_MPI_WEIGHTS_EMPTY = MPI_WEIGHTS_EMPTY; #endif /* This allows the device to select an alternative function for * dimsCreate */ MPIR_Process.dimsCreate = 0; /* "Allocate" from the reserved space for builtin communicators and * (partially) initialize predefined communicators. comm_parent is * intially NULL and will be allocated by the device if the process group * was started using one of the MPI_Comm_spawn functions. */ MPIR_Process.comm_world = MPIR_Comm_builtin + 0; MPII_Comm_init(MPIR_Process.comm_world); MPIR_Process.comm_world->handle = MPI_COMM_WORLD; MPIR_Process.comm_world->context_id = 0 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_world->recvcontext_id = 0 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_world->comm_kind = MPIR_COMM_KIND__INTRACOMM; /* This initialization of the comm name could be done only when * comm_get_name is called */ MPL_strncpy(MPIR_Process.comm_world->name, "MPI_COMM_WORLD", MPI_MAX_OBJECT_NAME); MPIR_Process.comm_self = MPIR_Comm_builtin + 1; MPII_Comm_init(MPIR_Process.comm_self); MPIR_Process.comm_self->handle = MPI_COMM_SELF; MPIR_Process.comm_self->context_id = 1 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_self->recvcontext_id = 1 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.comm_self->comm_kind = MPIR_COMM_KIND__INTRACOMM; MPL_strncpy(MPIR_Process.comm_self->name, "MPI_COMM_SELF", MPI_MAX_OBJECT_NAME); #ifdef MPID_NEEDS_ICOMM_WORLD MPIR_Process.icomm_world = MPIR_Comm_builtin + 2; MPII_Comm_init(MPIR_Process.icomm_world); MPIR_Process.icomm_world->handle = MPIR_ICOMM_WORLD; MPIR_Process.icomm_world->context_id = 2 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.icomm_world->recvcontext_id = 2 << MPIR_CONTEXT_PREFIX_SHIFT; MPIR_Process.icomm_world->comm_kind = MPIR_COMM_KIND__INTRACOMM; MPL_strncpy(MPIR_Process.icomm_world->name, "MPI_ICOMM_WORLD", MPI_MAX_OBJECT_NAME); /* Note that these communicators are not ready for use - MPID_Init * will setup self and world, and icomm_world if it desires it. */ #endif MPIR_Process.comm_parent = NULL; /* Setup the initial communicator list in case we have * enabled the debugger message-queue interface */ MPII_COMML_REMEMBER(MPIR_Process.comm_world); MPII_COMML_REMEMBER(MPIR_Process.comm_self); /* MPIU_Timer_pre_init(); */ /* Wait for debugger to attach if requested. */ if (MPIR_CVAR_DEBUG_HOLD) { volatile int hold = 1; while (hold) #ifdef HAVE_USLEEP usleep(100); #endif ; } #if defined(HAVE_ERROR_CHECKING) && (HAVE_ERROR_CHECKING == MPID_ERROR_LEVEL_RUNTIME) MPIR_Process.do_error_checks = MPIR_CVAR_ERROR_CHECKING; #endif /* define MPI as initialized so that we can use MPI functions within * MPID_Init if necessary */ OPA_store_int(&MPIR_Process.mpich_state, MPICH_MPI_STATE__IN_INIT); /* We can't acquire any critical sections until this point. Any * earlier the basic data structures haven't been initialized */ MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); exit_init_cs_on_failure = 1; /* create MPI_INFO_NULL object */ /* FIXME: Currently this info object is empty, we need to add data to this * as defined by the standard. */ info_ptr = MPIR_Info_builtin + 1; info_ptr->handle = MPI_INFO_ENV; MPIR_Object_set_ref(info_ptr, 1); info_ptr->next = NULL; info_ptr->key = NULL; info_ptr->value = NULL; #ifdef USE_MEMORY_TRACING MPL_trinit(); #endif /* Set the number of tag bits. The device may override this value. */ MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT; /* Create complete request to return in the event of immediately complete * operations. Use a SEND request to cover all possible use-cases. */ MPIR_Process.lw_req = MPIR_Request_create(MPIR_REQUEST_KIND__SEND); MPIR_ERR_CHKANDSTMT(MPIR_Process.lw_req == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq"); MPIR_cc_set(&MPIR_Process.lw_req->cc, 0); mpi_errno = MPID_Init(argc, argv, required, &thread_provided, &has_args, &has_env); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Initialize collectives infrastructure */ mpi_errno = MPII_Coll_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Set tag_ub as function of tag_bits set by the device */ MPIR_Process.attrs.tag_ub = MPIR_TAG_USABLE_BITS; /* Assert: tag_ub should be a power of 2 minus 1 */ MPIR_Assert(((unsigned) MPIR_Process. attrs.tag_ub & ((unsigned) MPIR_Process.attrs.tag_ub + 1)) == 0); /* Assert: tag_ub is at least the minimum asked for in the MPI spec */ MPIR_Assert(MPIR_Process.attrs.tag_ub >= 32767); /* Capture the level of thread support provided */ MPIR_ThreadInfo.thread_provided = thread_provided; if (provided) *provided = thread_provided; #if defined MPICH_IS_THREADED MPIR_ThreadInfo.isThreaded = (thread_provided == MPI_THREAD_MULTIPLE); #endif /* MPICH_IS_THREADED */ /* FIXME: Define these in the interface. Does Timer init belong here? */ MPII_Timer_init(MPIR_Process.comm_world->rank, MPIR_Process.comm_world->local_size); #ifdef USE_MEMORY_TRACING #ifdef MPICH_IS_THREADED MPL_trconfig(MPIR_Process.comm_world->rank, MPIR_ThreadInfo.isThreaded); #else MPL_trconfig(MPIR_Process.comm_world->rank, 0); #endif /* Indicate that we are near the end of the init step; memory * allocated already will have an id of zero; this helps * separate memory leaks in the initialization code from * leaks in the "active" code */ #endif #ifdef MPL_USE_DBG_LOGGING /* FIXME: This is a hack to handle the common case of two worlds. * If the parent comm is not NULL, we always give the world number * as "1" (false). */ #ifdef MPICH_IS_THREADED MPL_dbg_init(argc, argv, has_args, has_env, MPIR_Process.comm_parent != NULL, MPIR_Process.comm_world->rank, MPIR_ThreadInfo.isThreaded); #else MPL_dbg_init(argc, argv, has_args, has_env, MPIR_Process.comm_parent != NULL, MPIR_Process.comm_world->rank, 0); #endif MPIR_DBG_INIT = MPL_dbg_class_alloc("INIT", "init"); MPIR_DBG_PT2PT = MPL_dbg_class_alloc("PT2PT", "pt2pt"); MPIR_DBG_THREAD = MPL_dbg_class_alloc("THREAD", "thread"); MPIR_DBG_DATATYPE = MPL_dbg_class_alloc("DATATYPE", "datatype"); MPIR_DBG_HANDLE = MPL_dbg_class_alloc("HANDLE", "handle"); MPIR_DBG_COMM = MPL_dbg_class_alloc("COMM", "comm"); MPIR_DBG_BSEND = MPL_dbg_class_alloc("BSEND", "bsend"); MPIR_DBG_ERRHAND = MPL_dbg_class_alloc("ERRHAND", "errhand"); MPIR_DBG_OTHER = MPL_dbg_class_alloc("OTHER", "other"); MPIR_DBG_REQUEST = MPL_dbg_class_alloc("REQUEST", "request"); MPIR_DBG_COLL = MPL_dbg_class_alloc("COLL", "coll"); MPIR_DBG_ASSERT = MPL_dbg_class_alloc("ASSERT", "assert"); MPIR_DBG_STRING = MPL_dbg_class_alloc("STRING", "string"); #endif /* Initialize the C versions of the Fortran link-time constants. * * We now initialize the Fortran symbols from within the Fortran * interface in the routine that first needs the symbols. * This fixes a problem with symbols added by a Fortran compiler that * are not part of the C runtime environment (the Portland group * compilers would do this) */ #if defined(HAVE_FORTRAN_BINDING) && defined(HAVE_MPI_F_INIT_WORKS_WITH_C) mpirinitf_(); #endif /* FIXME: Does this need to come before the call to MPID_InitComplete? * For some debugger support, MPII_Wait_for_debugger may want to use * MPI communication routines to collect information for the debugger */ #ifdef HAVE_DEBUGGER_SUPPORT MPII_Wait_for_debugger(); #endif /* Let the device know that the rest of the init process is completed */ if (mpi_errno == MPI_SUCCESS) mpi_errno = MPID_InitCompleted(); MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /* Make fields of MPIR_Process global visible and set mpich_state * atomically so that MPI_Initialized() etc. are thread safe */ OPA_write_barrier(); OPA_store_int(&MPIR_Process.mpich_state, MPICH_MPI_STATE__POST_INIT); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ /* signal to error handling routines that core services are unavailable */ OPA_store_int(&MPIR_Process.mpich_state, MPICH_MPI_STATE__PRE_INIT); if (exit_init_cs_on_failure) { MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); } #if defined(MPICH_IS_THREADED) if (cs_initialized) { MPIR_Thread_CS_Finalize(); } #endif return mpi_errno; /* --END ERROR HANDLING-- */ }
/*@ MPIR_Type_blockindexed - create a block indexed datatype Input Parameters: + count - number of blocks in type . blocklength - number of elements in each block . displacement_array - offsets of blocks from start of type (see next parameter for units) . dispinbytes - if nonzero, then displacements are in bytes, otherwise they in terms of extent of oldtype - oldtype - type (using handle) of datatype on which new type is based Output Parameters: . newtype - handle of new block indexed datatype Return Value: MPI_SUCCESS on success, MPI error on failure. @*/ int MPIR_Type_blockindexed(int count, int blocklength, const void *displacement_array, int dispinbytes, MPI_Datatype oldtype, MPI_Datatype * newtype) { int mpi_errno = MPI_SUCCESS, i; int is_builtin, old_is_contig; MPI_Aint contig_count; MPI_Aint el_sz; MPI_Datatype el_type; MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub; MPI_Aint min_lb = 0, max_ub = 0, eff_disp; MPIR_Datatype *new_dtp; if (count == 0) return MPII_Type_zerolen(newtype); /* allocate new datatype object and handle */ new_dtp = (MPIR_Datatype *) MPIR_Handle_obj_alloc(&MPIR_Datatype_mem); /* --BEGIN ERROR HANDLING-- */ if (!new_dtp) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIR_Type_vector", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_committed = 0; new_dtp->attributes = NULL; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN); if (is_builtin) { el_sz = (MPI_Aint) MPIR_Datatype_get_basic_size(oldtype); el_type = oldtype; old_lb = 0; old_true_lb = 0; old_ub = el_sz; old_true_ub = el_sz; old_extent = el_sz; old_is_contig = 1; new_dtp->size = (MPI_Aint) count *(MPI_Aint) blocklength *el_sz; new_dtp->has_sticky_lb = 0; new_dtp->has_sticky_ub = 0; new_dtp->alignsize = el_sz; /* ??? */ new_dtp->n_builtin_elements = count * blocklength; new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = count; } else { /* user-defined base type (oldtype) */ MPIR_Datatype *old_dtp; MPIR_Datatype_get_ptr(oldtype, old_dtp); el_sz = old_dtp->builtin_element_size; el_type = old_dtp->basic_type; old_lb = old_dtp->lb; old_true_lb = old_dtp->true_lb; old_ub = old_dtp->ub; old_true_ub = old_dtp->true_ub; old_extent = old_dtp->extent; MPIR_Datatype_is_contig(oldtype, &old_is_contig); new_dtp->size = (MPI_Aint) count *(MPI_Aint) blocklength *(MPI_Aint) old_dtp->size; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->alignsize = old_dtp->alignsize; new_dtp->n_builtin_elements = count * blocklength * old_dtp->n_builtin_elements; new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = old_dtp->max_contig_blocks * count * blocklength; } /* priming for loop */ eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[0] : (((MPI_Aint) ((int *) displacement_array)[0]) * old_extent); MPII_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength, eff_disp, old_lb, old_ub, old_extent, min_lb, max_ub); /* determine new min lb and max ub */ for (i = 1; i < count; i++) { MPI_Aint tmp_lb, tmp_ub; eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] : (((MPI_Aint) ((int *) displacement_array)[i]) * old_extent); MPII_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength, eff_disp, old_lb, old_ub, old_extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } new_dtp->lb = min_lb; new_dtp->ub = max_ub; new_dtp->true_lb = min_lb + (old_true_lb - old_lb); new_dtp->true_ub = max_ub + (old_true_ub - old_ub); new_dtp->extent = max_ub - min_lb; /* new type is contig for N types if it is all one big block, * its size and extent are the same, and the old type was also * contiguous. */ new_dtp->is_contig = 0; if (old_is_contig) { contig_count = MPII_Datatype_blockindexed_count_contig(count, blocklength, displacement_array, dispinbytes, old_extent); new_dtp->max_contig_blocks = contig_count; if ((contig_count == 1) && ((MPI_Aint) new_dtp->size == new_dtp->extent)) { new_dtp->is_contig = 1; } } *newtype = new_dtp->handle; return mpi_errno; }