int MPIR_Create_unnamed_predefined(MPI_Datatype old, int combiner, int r, int p, MPI_Datatype * new_ptr) { int i; int mpi_errno = MPI_SUCCESS; F90Predefined *type; *new_ptr = MPI_DATATYPE_NULL; /* Has this type been defined already? */ for (i = 0; i < nAlloc; i++) { type = &f90Types[i]; if (type->combiner == combiner && type->r == r && type->p == p) { *new_ptr = type->d; return mpi_errno; } } /* Create a new type and remember it */ if (nAlloc >= MAX_F90_TYPES) { return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIF_Create_unnamed_predefined", __LINE__, MPI_ERR_INTERN, "**f90typetoomany", 0); } if (nAlloc == 0) { /* Install the finalize callback that frees these datatyeps. * Set the priority high enough that this will be executed * before the handle allocation check */ MPIR_Add_finalize(MPIR_FreeF90Datatypes, 0, 2); } type = &f90Types[nAlloc++]; type->combiner = combiner; type->r = r; type->p = p; /* Create a contiguous type from one instance of the named type */ mpi_errno = MPIR_Type_contiguous(1, old, &type->d); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Initialize the contents data */ { MPIR_Datatype *new_dtp = NULL; int vals[2]; int nvals = 0; switch (combiner) { case MPI_COMBINER_F90_INTEGER: nvals = 1; vals[0] = r; break; case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: nvals = 2; vals[0] = p; vals[1] = r; break; } MPIR_Datatype_get_ptr(type->d, new_dtp); mpi_errno = MPIR_Datatype_set_contents(new_dtp, combiner, nvals, 0, 0, vals, NULL, NULL); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* FIXME should we be setting type->is_permanent=TRUE here too? If so, * will the cleanup code handle it correctly and not freak out? */ #ifndef NDEBUG { MPI_Datatype old_basic = MPI_DATATYPE_NULL; MPI_Datatype new_basic = MPI_DATATYPE_NULL; /* we used MPIR_Type_contiguous and then stomped it's contents * information, so make sure that the basic_type is usable by * MPIR_Type_commit */ MPIR_Datatype_get_basic_type(old, old_basic); MPIR_Datatype_get_basic_type(new_dtp->handle, new_basic); MPIR_Assert(new_basic == old_basic); } #endif /* the MPI Standard requires that these types are pre-committed * (MPI-2.2, sec 16.2.5, pg 492) */ mpi_errno = MPIR_Type_commit(&type->d); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } *new_ptr = type->d; fn_fail: return mpi_errno; }
/*@ Dataloop_create_blockindexed - create blockindexed dataloop Arguments: + MPI_Aint count . void *displacement_array (array of either MPI_Aints or ints) . int displacement_in_bytes (boolean) . MPI_Datatype old_type . MPIR_Dataloop **output_dataloop_ptr . int output_dataloop_size .N Errors .N Returns 0 on success, -1 on failure. @*/ int MPII_Dataloop_create_blockindexed(MPI_Aint icount, MPI_Aint iblklen, const void *disp_array, int dispinbytes, MPI_Datatype oldtype, MPIR_Dataloop ** dlp_p, MPI_Aint * dlsz_p) { int err, is_builtin, is_vectorizable = 1; int i; MPI_Aint new_loop_sz; MPI_Aint contig_count, count, blklen; MPI_Aint old_extent, eff_disp0, eff_disp1, last_stride; MPIR_Dataloop *new_dlp; count = (MPI_Aint) icount; /* avoid subsequent casting */ blklen = (MPI_Aint) iblklen; /* if count or blklen are zero, handle with contig code, call it a int */ if (count == 0 || blklen == 0) { err = MPII_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p); return err; } is_builtin = (MPII_DATALOOP_HANDLE_HASLOOP(oldtype)) ? 0 : 1; if (is_builtin) { MPIR_Datatype_get_size_macro(oldtype, old_extent); } else { MPIR_Datatype_get_extent_macro(oldtype, old_extent); } contig_count = MPII_Datatype_blockindexed_count_contig(count, blklen, disp_array, dispinbytes, old_extent); /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than a blockindexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) disp_array)[0] == 0) || (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) { err = MPII_Dataloop_create_contiguous(icount * iblklen, oldtype, dlp_p, dlsz_p); return err; } /* optimization: * * if contig_count == 1 store it as a blockindexed with one * element rather than as a lot of individual blocks. */ if (contig_count == 1) { /* adjust count and blklen and drop through */ blklen *= count; count = 1; iblklen *= icount; icount = 1; } /* optimization: * * if displacements start at zero and result in a fixed stride, * store it as a vector rather than a blockindexed dataloop. */ eff_disp0 = (dispinbytes) ? ((MPI_Aint) ((MPI_Aint *) disp_array)[0]) : (((MPI_Aint) ((int *) disp_array)[0]) * old_extent); if (count > 1 && eff_disp0 == (MPI_Aint) 0) { eff_disp1 = (dispinbytes) ? ((MPI_Aint) ((MPI_Aint *) disp_array)[1]) : (((MPI_Aint) ((int *) disp_array)[1]) * old_extent); last_stride = eff_disp1 - eff_disp0; for (i = 2; i < count; i++) { eff_disp0 = eff_disp1; eff_disp1 = (dispinbytes) ? ((MPI_Aint) ((MPI_Aint *) disp_array)[i]) : (((MPI_Aint) ((int *) disp_array)[i]) * old_extent); if (eff_disp1 - eff_disp0 != last_stride) { is_vectorizable = 0; break; } } if (is_vectorizable) { err = MPII_Dataloop_create_vector(count, blklen, last_stride, 1, /* strideinbytes */ oldtype, dlp_p, dlsz_p); return err; } } /* TODO: optimization: * * if displacements result in a fixed stride, but first displacement * is not zero, store it as a blockindexed (blklen == 1) of a vector. */ /* TODO: optimization: * * if a blockindexed of a contig, absorb the contig into the blocklen * parameter and keep the same overall depth */ /* otherwise storing as a blockindexed dataloop */ /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)? */ if (is_builtin) { MPII_Dataloop_alloc(MPII_DATALOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = MPII_DATALOOP_KIND_BLOCKINDEXED | MPII_DATALOOP_FINAL_MASK; new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } else { MPIR_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; MPII_DATALOOP_GET_LOOPPTR(oldtype, old_loop_ptr); MPII_DATALOOP_GET_LOOPSIZE(oldtype, old_loop_sz); MPII_Dataloop_alloc_and_copy(MPII_DATALOOP_KIND_BLOCKINDEXED, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = MPII_DATALOOP_KIND_BLOCKINDEXED; MPIR_Datatype_get_size_macro(oldtype, new_dlp->el_size); MPIR_Datatype_get_extent_macro(oldtype, new_dlp->el_extent); MPIR_Datatype_get_basic_type(oldtype, new_dlp->el_type); } new_dlp->loop_params.bi_t.count = count; new_dlp->loop_params.bi_t.blocksize = blklen; /* copy in displacement parameters * * regardless of dispinbytes, we store displacements in bytes in loop. */ blockindexed_array_copy(count, disp_array, new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent); *dlp_p = new_dlp; *dlsz_p = new_loop_sz; return 0; }
/* MPIR_Type_get_elements * * Arguments: * - bytes_p - input/output byte count * - count - maximum number of this type to subtract from the bytes; a count * of <0 indicates use as many as we like * - datatype - input datatype * * Returns number of elements available given the two constraints of number of * bytes and count of types. Also reduces the byte count by the amount taken * up by the types. * * This is called from MPI_Get_elements() when it sees a type with multiple * element types (datatype_ptr->element_sz = -1). This function calls itself too. */ PMPI_LOCAL MPI_Count MPIR_Type_get_elements(MPI_Count *bytes_p, MPI_Count count, MPI_Datatype datatype) { MPIR_Datatype *datatype_ptr = NULL; MPIR_Datatype_get_ptr(datatype, datatype_ptr); /* invalid if builtin */ /* if we have gotten down to a type with only one element type, * call MPIR_Type_get_basic_type_elements() and return. */ if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN || datatype == MPI_FLOAT_INT || datatype == MPI_DOUBLE_INT || datatype == MPI_LONG_INT || datatype == MPI_SHORT_INT || datatype == MPI_LONG_DOUBLE_INT) { return MPIR_Type_get_basic_type_elements(bytes_p, count, datatype); } else if (datatype_ptr->builtin_element_size >= 0) { MPI_Datatype basic_type = MPI_DATATYPE_NULL; MPIR_Datatype_get_basic_type(datatype_ptr->basic_type, basic_type); return MPIR_Type_get_basic_type_elements(bytes_p, count * datatype_ptr->n_builtin_elements, basic_type); } else { /* we have bytes left and still don't have a single element size; must * recurse. */ int i, j, *ints; MPI_Count typecount = 0, nr_elements = 0, last_nr_elements; MPI_Aint *aints; MPI_Datatype *types; /* Establish locations of arrays */ MPIR_Type_access_contents(datatype_ptr->handle, &ints, &aints, &types); if (!ints || !aints || !types) return MPI_ERR_TYPE; switch (datatype_ptr->contents->combiner) { case MPI_COMBINER_NAMED: case MPI_COMBINER_DUP: case MPI_COMBINER_RESIZED: return MPIR_Type_get_elements(bytes_p, count, *types); break; case MPI_COMBINER_CONTIGUOUS: case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: /* count is first in ints array */ return MPIR_Type_get_elements(bytes_p, count * (*ints), *types); break; case MPI_COMBINER_INDEXED_BLOCK: case MPI_COMBINER_HINDEXED_BLOCK: /* count is first in ints array, blocklength is second */ return MPIR_Type_get_elements(bytes_p, count * ints[0] * ints[1], *types); break; case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: for (i=0; i < (*ints); i++) { /* add up the blocklengths to get a max. # of the next type */ typecount += ints[i+1]; } return MPIR_Type_get_elements(bytes_p, count * typecount, *types); break; case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: /* In this case we can't simply multiply the count of the next * type by the count of the current type, because we need to * cycle through the types just as the struct would. thus the * nested loops. * * We need to keep going until we get less elements than expected * or we run out of bytes. */ last_nr_elements = 1; /* seed value */ for (j=0; (count < 0 || j < count) && *bytes_p > 0 && last_nr_elements > 0; j++) { /* recurse on each type; bytes are reduced in calls */ for (i=0; i < (*ints); i++) { /* skip zero-count elements of the struct */ if (ints[i+1] == 0) continue; last_nr_elements = MPIR_Type_get_elements(bytes_p, ints[i+1], types[i]); nr_elements += last_nr_elements; MPIR_Assert(last_nr_elements >= 0); if (last_nr_elements < ints[i+1]) break; } } return nr_elements; break; case MPI_COMBINER_SUBARRAY: case MPI_COMBINER_DARRAY: case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: case MPI_COMBINER_F90_INTEGER: default: /* --BEGIN ERROR HANDLING-- */ MPIR_Assert(0); return -1; break; /* --END ERROR HANDLING-- */ } } }
/* MPIR_Get_elements_x_impl * * Arguments: * - byte_count - input/output byte count * - datatype - input datatype * - elements - Number of basic elements this byte_count would contain * * Returns number of elements available given the two constraints of number of * bytes and count of types. Also reduces the byte count by the amount taken * up by the types. */ int MPIR_Get_elements_x_impl(MPI_Count *byte_count, MPI_Datatype datatype, MPI_Count *elements) { int mpi_errno = MPI_SUCCESS; MPIR_Datatype *datatype_ptr = NULL; if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPIR_Datatype_get_ptr(datatype, datatype_ptr); } /* three cases: * - nice, simple, single element type * - derived type with a zero size * - type with multiple element types (nastiest) */ if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN || (datatype_ptr->builtin_element_size != -1 && datatype_ptr->size > 0)) { /* QUESTION: WHAT IF SOMEONE GAVE US AN MPI_UB OR MPI_LB??? */ /* in both cases we do not limit the number of types that might * be in bytes */ if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPI_Datatype basic_type = MPI_DATATYPE_NULL; MPIR_Datatype_get_basic_type(datatype_ptr->basic_type, basic_type); *elements = MPIR_Type_get_basic_type_elements(byte_count, -1, basic_type); } else { /* Behaves just like MPI_Get_Count in the predefined case */ MPI_Count size; MPIR_Datatype_get_size_macro(datatype, size); if ((*byte_count % size) != 0) *elements = MPI_UNDEFINED; else *elements = MPIR_Type_get_basic_type_elements(byte_count, -1, datatype); } MPIR_Assert(*byte_count >= 0); } else if (datatype_ptr->size == 0) { if (*byte_count > 0) { /* --BEGIN ERROR HANDLING-- */ /* datatype size of zero and count > 0 should never happen. */ (*elements) = MPI_UNDEFINED; /* --END ERROR HANDLING-- */ } else { /* This is ambiguous. However, discussions on MPI Forum * reached a consensus that this is the correct return * value */ (*elements) = 0; } } else /* derived type with weird element type or weird size */ { MPIR_Assert(datatype_ptr->builtin_element_size == -1); *elements = MPIR_Type_get_elements(byte_count, -1, datatype); } return mpi_errno; }
/* segment_init * * buf - datatype buffer location * count - number of instances of the datatype in the buffer * handle - handle for datatype (could be derived or not) * segp - pointer to previously allocated segment structure * * Notes: * - Assumes that the segment has been allocated. * */ static inline void segment_init(const void *buf, MPI_Aint count, MPI_Datatype handle, struct MPIR_Segment *segp) { MPI_Aint elmsize = 0; int i, depth = 0; int branch_detected = 0; struct MPII_Dataloop_stackelm *elmp; struct MPIR_Dataloop *dlp = 0, *sblp = &segp->builtin_loop; #ifdef MPII_DATALOOP_DEBUG_MANIPULATE MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE, (MPL_DBG_FDEST, "segment_init: count = %d, buf = %x\n", count, buf)); #endif if (!MPII_DATALOOP_HANDLE_HASLOOP(handle)) { /* simplest case; datatype has no loop (basic) */ MPIR_Datatype_get_size_macro(handle, elmsize); sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK; sblp->loop_params.c_t.count = count; sblp->loop_params.c_t.dataloop = 0; sblp->el_size = elmsize; MPIR_Datatype_get_basic_type(handle, sblp->el_type); MPIR_Datatype_get_extent_macro(handle, sblp->el_extent); dlp = sblp; depth = 1; } else if (count == 0) { /* only use the builtin */ sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK; sblp->loop_params.c_t.count = 0; sblp->loop_params.c_t.dataloop = 0; sblp->el_size = 0; sblp->el_extent = 0; dlp = sblp; depth = 1; } else if (count == 1) { /* don't use the builtin */ MPII_DATALOOP_GET_LOOPPTR(handle, dlp); } else { /* default: need to use builtin to handle contig; must check * loop depth first */ MPIR_Dataloop *oldloop; /* loop from original type, before new count */ MPI_Aint type_size, type_extent; MPI_Datatype el_type; MPII_DATALOOP_GET_LOOPPTR(handle, oldloop); MPIR_Assert(oldloop != NULL); MPIR_Datatype_get_size_macro(handle, type_size); MPIR_Datatype_get_extent_macro(handle, type_extent); MPIR_Datatype_get_basic_type(handle, el_type); if (depth == 1 && ((oldloop->kind & MPII_DATALOOP_KIND_MASK) == MPII_DATALOOP_KIND_CONTIG)) { if (type_size == type_extent) { /* use a contig */ sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK; sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count; sblp->loop_params.c_t.dataloop = NULL; sblp->el_size = oldloop->el_size; sblp->el_extent = oldloop->el_extent; sblp->el_type = oldloop->el_type; } else { /* use a vector, with extent of original type becoming the stride */ sblp->kind = MPII_DATALOOP_KIND_VECTOR | MPII_DATALOOP_FINAL_MASK; sblp->loop_params.v_t.count = count; sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count; sblp->loop_params.v_t.stride = type_extent; sblp->loop_params.v_t.dataloop = NULL; sblp->el_size = oldloop->el_size; sblp->el_extent = oldloop->el_extent; sblp->el_type = oldloop->el_type; } } else { /* general case */ sblp->kind = MPII_DATALOOP_KIND_CONTIG; sblp->loop_params.c_t.count = count; sblp->loop_params.c_t.dataloop = oldloop; sblp->el_size = type_size; sblp->el_extent = type_extent; sblp->el_type = el_type; depth++; /* we're adding to the depth with the builtin */ MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH)); } dlp = sblp; } /* assert instead of return b/c dtype/dloop errorhandling code is inconsistent */ MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH)); /* initialize the rest of the segment values */ segp->handle = handle; segp->ptr = (void *) buf; segp->stream_off = 0; segp->cur_sp = 0; segp->valid_sp = 0; /* initialize the first stackelm in its entirety */ elmp = &(segp->stackelm[0]); MPII_Dataloop_stackelm_load(elmp, dlp, 0); branch_detected = elmp->may_require_reloading; /* Fill in parameters not set by MPII_Dataloop_stackelm_load */ elmp->orig_offset = 0; elmp->curblock = elmp->orig_block; /* MPII_Dataloop_stackelm_offset assumes correct orig_count, curcount, loop_p */ elmp->curoffset = /* elmp->orig_offset + */ MPII_Dataloop_stackelm_offset(elmp); i = 1; while (!(dlp->kind & MPII_DATALOOP_FINAL_MASK)) { /* get pointer to next dataloop */ switch (dlp->kind & MPII_DATALOOP_KIND_MASK) { case MPII_DATALOOP_KIND_CONTIG: case MPII_DATALOOP_KIND_VECTOR: case MPII_DATALOOP_KIND_BLOCKINDEXED: case MPII_DATALOOP_KIND_INDEXED: dlp = dlp->loop_params.cm_t.dataloop; break; case MPII_DATALOOP_KIND_STRUCT: dlp = dlp->loop_params.s_t.dataloop_array[0]; break; default: /* --BEGIN ERROR HANDLING-- */ MPIR_Assert(0); break; /* --END ERROR HANDLING-- */ } MPIR_Assert(i < MPII_DATALOOP_MAX_DATATYPE_DEPTH); /* loop_p, orig_count, orig_block, and curcount are all filled by us now. * the rest are filled in at processing time. */ elmp = &(segp->stackelm[i]); MPII_Dataloop_stackelm_load(elmp, dlp, branch_detected); branch_detected = elmp->may_require_reloading; i++; } segp->valid_sp = depth - 1; }