/*! \param [in] parent Pointer to context on client side \param [in] intraComm_ communicator of group client \param [in] interComm_ communicator of group server \cxtSer [in] cxtSer Pointer to context of server side. (It is only used on case of attached mode) */ CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer) : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4) { context = parent; intraComm = intraComm_; interComm = interComm_; MPI_Comm_rank(intraComm, &clientRank); MPI_Comm_size(intraComm, &clientSize); int flag; MPI_Comm_test_inter(interComm, &flag); if (flag) MPI_Comm_remote_size(interComm, &serverSize); else MPI_Comm_size(interComm, &serverSize); if (clientSize < serverSize) { int serverByClient = serverSize / clientSize; int remain = serverSize % clientSize; int rankStart = serverByClient * clientRank; if (clientRank < remain) { serverByClient++; rankStart += clientRank; } else rankStart += remain; for (int i = 0; i < serverByClient; i++) ranksServerLeader.push_back(rankStart + i); ranksServerNotLeader.resize(0); } else { int clientByServer = clientSize / serverSize; int remain = clientSize % serverSize; if (clientRank < (clientByServer + 1) * remain) { if (clientRank % (clientByServer + 1) == 0) ranksServerLeader.push_back(clientRank / (clientByServer + 1)); else ranksServerNotLeader.push_back(clientRank / (clientByServer + 1)); } else { int rank = clientRank - (clientByServer + 1) * remain; if (rank % clientByServer == 0) ranksServerLeader.push_back(remain + rank / clientByServer); else ranksServerNotLeader.push_back(remain + rank / clientByServer); } } timeLine = 0; }
int MPIX_Neighbor_alltoallw_x(const void *sendbuf, const MPI_Count sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const MPI_Count recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm) { int rc = MPI_SUCCESS; int is_intercomm; MPI_Comm_test_inter(comm, &is_intercomm); if (is_intercomm) BigMPI_Error("BigMPI does not support intercommunicators yet.\n"); if (sendbuf==MPI_IN_PLACE) BigMPI_Error("BigMPI does not support in-place in the v-collectives. Sorry. \n"); int size, rank; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); int * newsendcounts = malloc(size*sizeof(int)); assert(newsendcounts!=NULL); MPI_Datatype * newsendtypes = malloc(size*sizeof(MPI_Datatype)); assert(newsendtypes!=NULL); MPI_Aint * newsdispls = malloc(size*sizeof(MPI_Aint)); assert(newsdispls!=NULL); int * newrecvcounts = malloc(size*sizeof(int)); assert(newrecvcounts!=NULL); MPI_Datatype * newrecvtypes = malloc(size*sizeof(MPI_Datatype)); assert(newrecvtypes!=NULL); MPI_Aint * newrdispls = malloc(size*sizeof(MPI_Aint)); assert(newrdispls!=NULL); BigMPI_Convert_vectors(size, 0 /* splat count */, 0, sendcounts, 0 /* splat type */, 0, sendtypes, 0 /* zero displs */, sdispls, newsendcounts, newsendtypes, newsdispls); BigMPI_Convert_vectors(size, 0 /* splat count */, 0, recvcounts, 0 /* splat type */, 0, recvtypes, 0 /* zero displs */, rdispls, newrecvcounts, newrecvtypes, newrdispls); rc = MPI_Neighbor_alltoallw(sendbuf, newsendcounts, newsdispls, newsendtypes, recvbuf, newrecvcounts, newrdispls, newrecvtypes, comm); for (int i=0; i<size; i++) { MPI_Type_free(&newsendtypes[i]); MPI_Type_free(&newrecvtypes[i]); } free(newsendcounts); free(newsdispls); free(newsendtypes); free(newrecvcounts); free(newrecvtypes); free(newrdispls); return rc; }
void mpi_comm_test_inter_f(MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { MPI_Comm c_comm = MPI_Comm_f2c (*comm); OMPI_LOGICAL_NAME_DECL(flag); *ierr = OMPI_INT_2_FINT(MPI_Comm_test_inter(c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag))); if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { OMPI_SINGLE_INT_2_LOGICAL(flag); } }
int MTestTestComm(MPI_Comm comm) { int is_inter; if (comm == MPI_COMM_NULL) return 0; MPI_Comm_test_inter(comm, &is_inter); if (is_inter) return MTestTestIntercomm(comm); else return MTestTestIntracomm(comm); }
/*@ MPI_Comm_remote_group - Accesses the remote group associated with the given inter-communicator Input Parameter: . comm - Communicator (must be intercommunicator) Output Parameter: . group - remote group of communicator .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM @*/ int MPI_Comm_remote_group ( MPI_Comm comm, MPI_Group *group ) { struct MPIR_COMMUNICATOR *comm_ptr; struct MPIR_GROUP *group_ptr; int flag; int mpi_errno = MPI_SUCCESS; static char myname[] = "MPI_COMM_REMOTE_GROUP"; TR_PUSH(myname); comm_ptr = MPIR_GET_COMM_PTR(comm); MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname ); /* Check for intra-communicator */ MPI_Comm_test_inter ( comm, &flag ); if (!flag) return MPIR_ERROR(comm_ptr, MPIR_ERRCLASS_TO_CODE(MPI_ERR_COMM,MPIR_ERR_COMM_INTRA),myname); MPIR_Group_dup( comm_ptr->group, &group_ptr ); *group = group_ptr->self; TR_POP; return (MPI_SUCCESS); }
/* * getworldrank: Translate rank of process from communicator comm to * communicator MPI_COMM_WORLD. */ int getworldrank(MPI_Comm comm, int rank) { static MPI_Group worldgroup; static int isfirstcall = 1; int worldrank, isintercomm = 0; MPI_Group group; MPI_Comm_test_inter(comm, &isintercomm); if (isintercomm) { MPI_Comm_remote_group(comm, &group); } else { MPI_Comm_group(comm, &group); } if (isfirstcall) { MPI_Comm_group(MPI_COMM_WORLD, &worldgroup); isfirstcall = 0; } MPI_Group_translate_ranks(group, 1, &rank, worldgroup, &worldrank); MPI_Group_free(&group); return worldrank; }
EXPORT_MPI_API void FORTRAN_API mpi_comm_test_inter_ ( MPI_Fint *comm, MPI_Fint *flag, MPI_Fint *__ierr ) { int l_flag; *__ierr = MPI_Comm_test_inter( MPI_Comm_f2c(*comm), &l_flag); *flag = MPIR_TO_FLOG(l_flag); }
/*@ MPI_File_open - Opens a file Input Parameters: . comm - communicator (handle) . filename - name of file to open (string) . amode - file access mode (integer) . info - info object (handle) Output Parameters: . fh - file handle (handle) .N fortran @*/ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, MPI_Info info, MPI_File *fh) { int error_code = MPI_SUCCESS, file_system, flag, tmp_amode=0, rank; char *tmp; MPI_Comm dupcomm = MPI_COMM_NULL; ADIOI_Fns *fsops; static char myname[] = "MPI_FILE_OPEN"; #ifdef MPI_hpux int fl_xmpi; HPMP_IO_OPEN_START(fl_xmpi, comm); #endif /* MPI_hpux */ MPIU_THREAD_CS_ENTER(ALLFUNC,); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_COMM(comm, myname, error_code); MPIO_CHECK_INFO_ALL(info, error_code, comm); /* --END ERROR HANDLING-- */ error_code = MPI_Comm_test_inter(comm, &flag); /* --BEGIN ERROR HANDLING-- */ if (error_code || flag) { error_code = MPIO_Err_create_code(error_code, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_COMM, "**commnotintra", 0); goto fn_fail; } if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) + ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_AMODE, "**fileamodeone", 0); goto fn_fail; } if ((amode & MPI_MODE_RDONLY) && ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_AMODE, "**fileamoderead", 0); goto fn_fail; } if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_AMODE, "**fileamodeseq", 0); goto fn_fail; } MPI_Comm_dup(comm, &dupcomm); /* check if ADIO has been initialized. If not, initialize it */ MPIR_MPIOInit(&error_code); if (error_code != MPI_SUCCESS) goto fn_fail; /* check if amode is the same on all processes: at first glance, one might try * to use a built-in operator like MPI_BAND, but we need every mpi process to * agree the amode was not the same. Consider process A with * MPI_MODE_CREATE|MPI_MODE_RDWR, and B with MPI_MODE_RDWR: MPI_BAND yields * MPI_MODE_RDWR. A determines amodes are different, but B proceeds having not * detected an error */ MPI_Allreduce(&amode, &tmp_amode, 1, MPI_INT, ADIO_same_amode, dupcomm); if (tmp_amode == ADIO_AMODE_NOMATCH) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_NOT_SAME, "**fileamodediff", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ file_system = -1; /* resolve file system type from file name; this is a collective call */ ADIO_ResolveFileType(dupcomm, filename, &file_system, &fsops, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) { /* ADIO_ResolveFileType() will print as informative a message as it * possibly can or call MPIO_Err_setmsg. We just need to propagate * the error up. */ goto fn_fail; } /* --END ERROR HANDLING-- */ /* strip off prefix if there is one, but only skip prefixes * if they are greater than length one to allow for windows * drive specifications (e.g. c:\...) */ tmp = strchr(filename, ':'); if (tmp > filename + 1) { filename = tmp + 1; } /* use default values for disp, etype, filetype */ *fh = ADIO_Open(comm, dupcomm, filename, file_system, fsops, amode, 0, MPI_BYTE, MPI_BYTE, info, ADIO_PERM_NULL, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) { goto fn_fail; } /* --END ERROR HANDLING-- */ /* if MPI_MODE_SEQUENTIAL requested, file systems cannot do explicit offset * or independent file pointer accesses, leaving not much else aside from * shared file pointer accesses. */ if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_UNSUPPORTED_OPERATION, "**iosequnsupported", 0); ADIO_Close(*fh, &error_code); goto fn_fail; } /* determine name of file that will hold the shared file pointer */ /* can't support shared file pointers on a file system that doesn't support file locking. */ if ((error_code == MPI_SUCCESS) && ADIO_Feature((*fh), ADIO_SHARED_FP)) { MPI_Comm_rank(dupcomm, &rank); ADIOI_Shfp_fname(*fh, rank, &error_code); if (error_code != MPI_SUCCESS) goto fn_fail; /* if MPI_MODE_APPEND, set the shared file pointer to end of file. indiv. file pointer already set to end of file in ADIO_Open. Here file view is just bytes. */ if ((*fh)->access_mode & MPI_MODE_APPEND) { if (rank == (*fh)->hints->ranklist[0]) /* only one person need set the sharedfp */ ADIO_Set_shared_fp(*fh, (*fh)->fp_ind, &error_code); MPI_Barrier(dupcomm); } } #ifdef MPI_hpux HPMP_IO_OPEN_END(fl_xmpi, *fh, comm); #endif /* MPI_hpux */ fn_exit: MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; fn_fail: /* --BEGIN ERROR HANDLING-- */ if (dupcomm != MPI_COMM_NULL) MPI_Comm_free(&dupcomm); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; /* --END ERROR HANDLING-- */ }
/* create a loop between all the elements types */ int mpi_lsa_create_intercoms(com_lsa * com){ int prev, next,flag; int prev_size,next_size,size; /* create first connection between intracommunicators thanks to an intercommunicator */ /* one way */ MPI_Barrier(MPI_COMM_WORLD); if(com->rank_world==0)printf("]> Creating intercommunicators\n-One Way :\n"); MPI_Barrier(MPI_COMM_WORLD); printf("\t *> %d -> %d ",com->rank_world,com->master.com[4-((com->color_group)+1)]); MPI_Barrier(MPI_COMM_WORLD); MPI_Intercomm_create(com->com_group,0, MPI_COMM_WORLD,com->master.com[4-((com->color_group)+1)], com->rank_group, &(com->inter.com[4-((com->color_group)+1)])); MPI_Barrier(MPI_COMM_WORLD); if(com->rank_world==0)printf("\n]> The Other : \n"); MPI_Barrier(MPI_COMM_WORLD); printf("\t *> %d -> %d ",(com->color_group),(4-((com->color_group)-1)%4)%4); MPI_Barrier(MPI_COMM_WORLD); if(com->rank_world==0)printf("\n"); MPI_Barrier(MPI_COMM_WORLD); /* the other */ MPI_Intercomm_create(com->com_group,0, com->com_world,com->master.com[(4-((com->color_group)-1)%4)%4], com->rank_group, &(com->inter.com[(4-((com->color_group)-1)%4)%4])); /// WHY THIS ???????? if((4-((com->color_group)-1)%4)%4>com->color_group){ next=(4-(com->color_group-1)%4)%4; prev=4-((com->color_group)+1); } else { prev=(4-(com->color_group-1)%4)%4; next=4-((com->color_group)+1); } /* set the in and out communicators */ com->out_com=com->inter.com[next]; MPI_Comm_test_inter(com->inter.com[next],&flag); if(!flag){ mpi_lsa_print("\n\n\n\nproblem with inter.[next]\n\n\n\n\n", com); } com->in_com=com->inter.com[prev]; MPI_Comm_test_inter(com->inter.com[prev],&flag); if(!flag){ mpi_lsa_print("\n\n\n\n\nproblem with inter.[prev]\n\n\n\n", com); } MPI_Comm_remote_size(com->out_com,&next_size); MPI_Comm_remote_size(com->in_com,&prev_size); MPI_Comm_size(com->com_group,&size); if(com->color_group==0) printf("GMRES 1: my intercomm with LS %d \n",com->in_com); if(com->rank_world==0) printf("]> In and Out communicators : \n"); MPI_Barrier(MPI_COMM_WORLD); if(com->color_group==0) printf("GMRES : "); else if(com->color_group==1) printf("MAIN : "); else if(com->color_group==2) printf("ARNOLDI : "); else if(com->color_group==3) printf("LS : "); printf("%d: %d (%d) -> %d (%d) -> %d (%d) in_com: %d, out_com: %d\n",com->rank_world,com->master.com[prev],prev_size,com->color_group,size,com->master.com[next],next_size, com->in_com, com->out_com); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_comm_test_inter_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *ierr ){ int l2; *ierr = MPI_Comm_test_inter( (MPI_Comm)(*v1), &l2 ); if (*ierr == MPI_SUCCESS) *v2 = MPIR_TO_FLOG(l2); }
int main( int argc, char *argv[] ) { int errs = 0; int size, isLeft; MPI_Comm intercomm, newcomm; MTest_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); if (size < 4) { printf( "This test requires at least 4 processes\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } while (MTestGetIntercomm( &intercomm, &isLeft, 2 )) { int key, color; if (intercomm == MPI_COMM_NULL) continue; /* Split this intercomm. The new intercomms contain the processes that had odd (resp even) rank in their local group in the original intercomm */ MTestPrintfMsg( 1, "Created intercomm %s\n", MTestGetIntercommName() ); MPI_Comm_rank( intercomm, &key ); color = (key % 2); MPI_Comm_split( intercomm, color, key, &newcomm ); /* Make sure that the new communicator has the appropriate pieces */ if (newcomm != MPI_COMM_NULL) { int orig_rsize, orig_size, new_rsize, new_size; int predicted_size, flag, commok=1; MPI_Comm_test_inter( intercomm, &flag ); if (!flag) { errs++; printf( "Output communicator is not an intercomm\n" ); commok = 0; } MPI_Comm_remote_size( intercomm, &orig_rsize ); MPI_Comm_remote_size( newcomm, &new_rsize ); MPI_Comm_size( intercomm, &orig_size ); MPI_Comm_size( newcomm, &new_size ); /* The local size is 1/2 the original size, +1 if the size was odd and the color was even. More precisely, let n be the orig_size. Then color 0 color 1 orig size even n/2 n/2 orig size odd (n+1)/2 n/2 However, since these are integer valued, if n is even, then (n+1)/2 = n/2, so this table is much simpler: color 0 color 1 orig size even (n+1)/2 n/2 orig size odd (n+1)/2 n/2 */ predicted_size = (orig_size + !color) / 2; if (predicted_size != new_size) { errs++; printf( "Predicted size = %d but found %d for %s (%d,%d)\n", predicted_size, new_size, MTestGetIntercommName(), orig_size, orig_rsize ); commok = 0; } predicted_size = (orig_rsize + !color) / 2; if (predicted_size != new_rsize) { errs++; printf( "Predicted remote size = %d but found %d for %s (%d,%d)\n", predicted_size, new_rsize, MTestGetIntercommName(), orig_size, orig_rsize ); commok = 0; } /* ... more to do */ if (commok) { errs += TestIntercomm( newcomm ); } } else { int orig_rsize; /* If the newcomm is null, then this means that remote group for this color is of size zero (since all processes in this test have been given colors other than MPI_UNDEFINED). Confirm that here */ /* FIXME: ToDo */ MPI_Comm_remote_size( intercomm, &orig_rsize ); if (orig_rsize == 1) { if (color == 0) { errs++; printf( "Returned null intercomm when non-null expected\n" ); } } } if (newcomm != MPI_COMM_NULL) MPI_Comm_free( &newcomm ); MPI_Comm_free( &intercomm ); } MTest_Finalize(errs); MPI_Finalize(); return 0; }
/*@ MPI_Cart_create - Makes a new communicator to which topology information has been attached Input Parameters: + comm_old - input communicator (handle) . ndims - number of dimensions of cartesian grid (integer) . dims - integer array of size ndims specifying the number of processes in each dimension . periods - logical array of size ndims specifying whether the grid is periodic (true) or not (false) in each dimension - reorder - ranking may be reordered (true) or not (false) (logical) Output Parameter: . comm_cart - communicator with new cartesian topology (handle) Algorithm: We ignore 'reorder' info currently. .N fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_TOPOLOGY .N MPI_ERR_DIMS .N MPI_ERR_ARG @*/ int MPI_Cart_create ( MPI_Comm comm_old, int ndims, int *dims, int *periods, int reorder, MPI_Comm *comm_cart ) { int range[1][3]; MPI_Group group_old, group; int i, rank, num_ranks = 1; int mpi_errno = MPI_SUCCESS; int flag, size; MPIR_TOPOLOGY *topo; struct MPIR_COMMUNICATOR *comm_old_ptr; static char myname[] = "MPI_CART_CREATE"; TR_PUSH(myname); comm_old_ptr = MPIR_GET_COMM_PTR(comm_old); /* Check validity of arguments */ #ifndef MPIR_NO_ERROR_CHECKING MPIR_TEST_MPI_COMM(comm_old,comm_old_ptr,comm_old_ptr,myname); MPIR_TEST_ARG(comm_cart); MPIR_TEST_ARG(periods); if (ndims < 1 || dims == (int *)0) mpi_errno = MPI_ERR_DIMS; if (mpi_errno) return MPIR_ERROR(comm_old_ptr, mpi_errno, myname ); /* Check for Intra-communicator */ MPI_Comm_test_inter ( comm_old, &flag ); if (flag) return MPIR_ERROR(comm_old_ptr, MPIR_ERRCLASS_TO_CODE(MPI_ERR_COMM,MPIR_ERR_COMM_INTER), myname ); #endif /* Determine number of ranks in topology */ for ( i=0; i<ndims; i++ ) num_ranks *= (dims[i]>0)?dims[i]:-dims[i]; if ( num_ranks < 1 ) { (*comm_cart) = MPI_COMM_NULL; return MPIR_ERROR( comm_old_ptr, MPI_ERR_TOPOLOGY, myname ); } /* Is the old communicator big enough? */ MPIR_Comm_size (comm_old_ptr, &size); if (num_ranks > size) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_TOPOLOGY, MPIR_ERR_TOPO_TOO_LARGE, myname, "Topology size is larger than size of communicator", "Topology size %d is greater than communicator size %d", num_ranks, size ); return MPIR_ERROR(comm_old_ptr, mpi_errno, myname ); } /* Make new comm */ range[0][0] = 0; range[0][1] = num_ranks - 1; range[0][2] = 1; MPI_Comm_group ( comm_old, &group_old ); MPI_Group_range_incl ( group_old, 1, range, &group ); MPI_Comm_create ( comm_old, group, comm_cart ); MPI_Group_free( &group ); MPI_Group_free( &group_old ); /* Store topology information in new communicator */ if ( (*comm_cart) != MPI_COMM_NULL ) { MPIR_ALLOC(topo,(MPIR_TOPOLOGY *) MPIR_SBalloc ( MPIR_topo_els ), comm_old_ptr,MPI_ERR_EXHAUSTED,myname); MPIR_SET_COOKIE(&topo->cart,MPIR_CART_TOPOL_COOKIE) topo->cart.type = MPI_CART; topo->cart.nnodes = num_ranks; topo->cart.ndims = ndims; MPIR_ALLOC(topo->cart.dims,(int *)MALLOC( sizeof(int) * 3 * ndims ), comm_old_ptr,MPI_ERR_EXHAUSTED,myname); topo->cart.periods = topo->cart.dims + ndims; topo->cart.position = topo->cart.periods + ndims; for ( i=0; i<ndims; i++ ) { topo->cart.dims[i] = dims[i]; topo->cart.periods[i] = periods[i]; } /* Compute my position */ MPI_Comm_rank ( (*comm_cart), &rank ); for ( i=0; i < ndims; i++ ) { num_ranks = num_ranks / dims[i]; topo->cart.position[i] = rank / num_ranks; rank = rank % num_ranks; } /* cache topology information */ MPI_Attr_put ( (*comm_cart), MPIR_TOPOLOGY_KEYVAL, (void *)topo ); } TR_POP; return (mpi_errno); }
void declareBindings (void) { /* === Point-to-point === */ void* buf; int count; MPI_Datatype datatype; int dest; int tag; MPI_Comm comm; MPI_Send (buf, count, datatype, dest, tag, comm); // L12 int source; MPI_Status status; MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15 MPI_Get_count (&status, datatype, &count); MPI_Bsend (buf, count, datatype, dest, tag, comm); MPI_Ssend (buf, count, datatype, dest, tag, comm); MPI_Rsend (buf, count, datatype, dest, tag, comm); void* buffer; int size; MPI_Buffer_attach (buffer, size); // L22 MPI_Buffer_detach (buffer, &size); MPI_Request request; MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25 MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request); MPI_Issend (buf, count, datatype, dest, tag, comm, &request); MPI_Irsend (buf, count, datatype, dest, tag, comm, &request); MPI_Irecv (buf, count, datatype, source, tag, comm, &request); MPI_Wait (&request, &status); int flag; MPI_Test (&request, &flag, &status); // L32 MPI_Request_free (&request); MPI_Request* array_of_requests; int index; MPI_Waitany (count, array_of_requests, &index, &status); // L36 MPI_Testany (count, array_of_requests, &index, &flag, &status); MPI_Status* array_of_statuses; MPI_Waitall (count, array_of_requests, array_of_statuses); // L39 MPI_Testall (count, array_of_requests, &flag, array_of_statuses); int incount; int outcount; int* array_of_indices; MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L44--45 MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L46--47 MPI_Iprobe (source, tag, comm, &flag, &status); // L48 MPI_Probe (source, tag, comm, &status); MPI_Cancel (&request); MPI_Test_cancelled (&status, &flag); MPI_Send_init (buf, count, datatype, dest, tag, comm, &request); MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Recv_init (buf, count, datatype, source, tag, comm, &request); MPI_Start (&request); MPI_Startall (count, array_of_requests); void* sendbuf; int sendcount; MPI_Datatype sendtype; int sendtag; void* recvbuf; int recvcount; MPI_Datatype recvtype; MPI_Datatype recvtag; MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, &status); // L67--69 MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag, comm, &status); // L70--71 MPI_Datatype oldtype; MPI_Datatype newtype; MPI_Type_contiguous (count, oldtype, &newtype); // L74 int blocklength; { int stride; MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78 } { MPI_Aint stride; MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82 } int* array_of_blocklengths; { int* array_of_displacements; MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L87--88 } { MPI_Aint* array_of_displacements; MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L92--93 MPI_Datatype* array_of_types; MPI_Type_struct (count, array_of_blocklengths, array_of_displacements, array_of_types, &newtype); // L95--96 } void* location; MPI_Aint address; MPI_Address (location, &address); // L100 MPI_Aint extent; MPI_Type_extent (datatype, &extent); // L102 MPI_Type_size (datatype, &size); MPI_Aint displacement; MPI_Type_lb (datatype, &displacement); // L105 MPI_Type_ub (datatype, &displacement); MPI_Type_commit (&datatype); MPI_Type_free (&datatype); MPI_Get_elements (&status, datatype, &count); void* inbuf; void* outbuf; int outsize; int position; MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114 int insize; MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype, comm); // L116--117 MPI_Pack_size (incount, datatype, comm, &size); /* === Collectives === */ MPI_Barrier (comm); // L121 int root; MPI_Bcast (buffer, count, datatype, root, comm); // L123 MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L124--125 int* recvcounts; int* displs; MPI_Gatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm); // L128--130 MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L131--132 int* sendcounts; MPI_Scatterv (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); // L134--135 MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L136--137 MPI_Allgatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); // L138--140 MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L141--142 int* sdispls; int* rdispls; MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); // L145--147 MPI_Op op; MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149 #if 0 MPI_User_function function; int commute; MPI_Op_create (function, commute, &op); // L153 #endif MPI_Op_free (&op); // L155 MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm); MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm); MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm); /* === Groups, contexts, and communicators === */ MPI_Group group; MPI_Group_size (group, &size); // L162 int rank; MPI_Group_rank (group, &rank); // L164 MPI_Group group1; int n; int* ranks1; MPI_Group group2; int* ranks2; MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170 int result; MPI_Group_compare (group1, group2, &result); // L172 MPI_Group newgroup; MPI_Group_union (group1, group2, &newgroup); // L174 MPI_Group_intersection (group1, group2, &newgroup); MPI_Group_difference (group1, group2, &newgroup); int* ranks; MPI_Group_incl (group, n, ranks, &newgroup); // L178 MPI_Group_excl (group, n, ranks, &newgroup); extern int ranges[][3]; MPI_Group_range_incl (group, n, ranges, &newgroup); // L181 MPI_Group_range_excl (group, n, ranges, &newgroup); MPI_Group_free (&group); MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); MPI_Comm comm1; MPI_Comm comm2; MPI_Comm_compare (comm1, comm2, &result); MPI_Comm newcomm; MPI_Comm_dup (comm, &newcomm); MPI_Comm_create (comm, group, &newcomm); int color; int key; MPI_Comm_split (comm, color, key, &newcomm); // L194 MPI_Comm_free (&comm); MPI_Comm_test_inter (comm, &flag); MPI_Comm_remote_size (comm, &size); MPI_Comm_remote_group (comm, &group); MPI_Comm local_comm; int local_leader; MPI_Comm peer_comm; int remote_leader; MPI_Comm newintercomm; MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag, &newintercomm); // L204--205 MPI_Comm intercomm; MPI_Comm newintracomm; int high; MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209 int keyval; #if 0 MPI_Copy_function copy_fn; MPI_Delete_function delete_fn; void* extra_state; MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215 #endif MPI_Keyval_free (&keyval); // L217 void* attribute_val; MPI_Attr_put (comm, keyval, attribute_val); // L219 MPI_Attr_get (comm, keyval, attribute_val, &flag); MPI_Attr_delete (comm, keyval); /* === Environmental inquiry === */ char* name; int resultlen; MPI_Get_processor_name (name, &resultlen); // L226 MPI_Errhandler errhandler; #if 0 MPI_Handler_function function; MPI_Errhandler_create (function, &errhandler); // L230 #endif MPI_Errhandler_set (comm, errhandler); // L232 MPI_Errhandler_get (comm, &errhandler); MPI_Errhandler_free (&errhandler); int errorcode; char* string; MPI_Error_string (errorcode, string, &resultlen); // L237 int errorclass; MPI_Error_class (errorcode, &errorclass); // L239 MPI_Wtime (); MPI_Wtick (); int argc; char** argv; MPI_Init (&argc, &argv); // L244 MPI_Finalize (); MPI_Initialized (&flag); MPI_Abort (comm, errorcode); }
/* * Compute the aggregator-related parameters that are required in 2-phase * collective IO of ADIO. * The parameters are * . the number of aggregators (proxies) : fd->hints->cb_nodes * . the ranks of the aggregators : fd->hints->ranklist * If MP_IONODEFILE is defined, POE determines all tasks on every node listed * in the node file and defines MP_IOTASKLIST with them, making them all * aggregators. Alternatively, the user can explictly set MP_IOTASKLIST * themselves. The format of the MP_IOTASKLIST is a colon-delimited list of * task ids, the first entry being the total number of aggregators, for example * to specify 4 aggregators on task ids 0,8,16,24 the value would be: * 4:0:8:16:24. If there is no MP_IONODEFILE, or MP_IOTASKLIST, then the * default aggregator selection is 1 task per node for every node of the job - * additionally, an environment variable MP_IOAGGR_CNT can be specified, which * defines the total number of aggregators, spread evenly across all the nodes. * The romio_cb_nodes and romio_cb_config_list hint user settings are ignored. */ int ADIOI_PE_gen_agg_ranklist(ADIO_File fd) { int numAggs = 0; char *ioTaskList = getenv( "MP_IOTASKLIST" ); char *ioAggrCount = getenv("MP_IOAGGR_CNT"); int i,j; int inTERcommFlag = 0; int myRank,commSize; MPI_Comm_rank(fd->comm, &myRank); MPI_Comm_size(fd->comm, &commSize); MPI_Comm_test_inter(fd->comm, &inTERcommFlag); if (inTERcommFlag) { FPRINTF(stderr,"ERROR: ATTENTION: inTERcomms are not supported in MPI-IO - aborting....\n"); perror("ADIOI_PE_gen_agg_ranklist:"); MPI_Abort(MPI_COMM_WORLD, 1); } if (ioTaskList) { int ioTaskListLen = strlen(ioTaskList); int ioTaskListPos = 0; char tmpBuf[8]; /* Big enough for 1M tasks (7 digits task ID). */ tmpBuf[7] = '\0'; for (i=0; i<7; i++) { tmpBuf[i] = *ioTaskList++; /* Maximum is 7 digits for 1 million. */ ioTaskListPos++; if (*ioTaskList == ':') { /* If the next char is a ':' ends it. */ tmpBuf[i+1] = '\0'; break; } } numAggs = atoi(tmpBuf); if (numAggs == 0) FPRINTF(stderr,"ERROR: ATTENTION: Number of aggregators specified in MP_IOTASKLIST set at 0 - default aggregator selection will be used.\n"); else if (!((numAggs > 0 ) && (numAggs <= commSize))) { FPRINTF(stderr,"ERROR: ATTENTION: The number of aggregators (%s) specified in MP_IOTASKLIST is outside the communicator task range of %d.\n",tmpBuf,commSize); numAggs = commSize; } fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int)); int aggIndex = 0; while (aggIndex < numAggs) { ioTaskList++; /* Advance past the ':' */ ioTaskListPos++; int allDigits=1; for (i=0; i<7; i++) { if (*ioTaskList < '0' || *ioTaskList > '9') allDigits=0; tmpBuf[i] = *ioTaskList++; ioTaskListPos++; if ( (*ioTaskList == ':') || (*ioTaskList == '\0') ) { tmpBuf[i+1] = '\0'; break; } } if (allDigits) { int newAggRank = atoi(tmpBuf); if (!((newAggRank >= 0 ) && (newAggRank < commSize))) { FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not within the communicator task range of 0 to %d - it will be ignored.\n",tmpBuf,commSize-1); } else { int aggAlreadyAdded = 0; for (i=0;i<aggIndex;i++) if (fd->hints->ranklist[i] == newAggRank) { aggAlreadyAdded = 1; break; } if (!aggAlreadyAdded) fd->hints->ranklist[aggIndex++] = newAggRank; else FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%d' is specified multiple times in MP_IOTASKLIST - duplicates are ignored.\n",newAggRank); } } else { FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not a valid integer task id - it will be ignored.\n",tmpBuf); } /* At the end check whether the list is shorter than specified. */ if (ioTaskListPos == ioTaskListLen) { if (aggIndex == 0) { FPRINTF(stderr,"ERROR: ATTENTION: No aggregators were correctly specified in MP_IOTASKLIST - default aggregator selection will be used.\n"); ADIOI_Free(fd->hints->ranklist); } else if (aggIndex < numAggs) FPRINTF(stderr,"ERROR: ATTENTION: %d aggregators were specified in MP_IOTASKLIST but only %d were correctly specified - setting the number of aggregators to %d.\n",numAggs, aggIndex,aggIndex); numAggs = aggIndex; } } } if (numAggs == 0) { MPID_Comm *mpidCommData; MPID_Comm_get_ptr(fd->comm,mpidCommData); int localSize = mpidCommData->local_size; // get my node rank int myNodeRank = mpidCommData->intranode_table[mpidCommData->rank]; int *allNodeRanks = (int *) ADIOI_Malloc (localSize * sizeof(int)); allNodeRanks[myRank] = myNodeRank; MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, allNodeRanks, 1, MPI_INT, fd->comm); #ifdef AGG_DEBUG printf("MPID_Comm data: local_size is %d\nintranode_table entries:\n",mpidCommData->local_size); for (i=0;i<localSize;i++) { printf("%d ",mpidCommData->intranode_table[i]); } printf("\ninternode_table entries:\n"); for (i=0;i<localSize;i++) { printf("%d ",mpidCommData->internode_table[i]); } printf("\n"); printf("\nallNodeRanks entries:\n"); for (i=0;i<localSize;i++) { printf("%d ",allNodeRanks[i]); } printf("\n"); #endif if (ioAggrCount) { int cntType = -1; if ( strcasecmp(ioAggrCount, "ALL") ) { if ( (cntType = atoi(ioAggrCount)) <= 0 ) { /* Input is other non-digit or less than 1 the assume */ /* 1 aggregator per node. Note: atoi(-1) reutns -1. */ /* No warning message given here -- done earlier. */ cntType = -1; } } else { /* ALL is specified set aggr count to localSize */ cntType = -2; } switch(cntType) { case -1: /* 1 aggr/node case */ { int rankListIndex = 0; fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int)); for (i=0;i<localSize;i++) { if (allNodeRanks[i] == 0) { fd->hints->ranklist[rankListIndex++] = i; numAggs++; } } } break; case -2: /* ALL tasks case */ fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int)); for (i=0;i<localSize;i++) { fd->hints->ranklist[i] = i; numAggs++; } break; default: /* Specific aggr count case -- MUST be less than localSize, otherwise set to localSize */ if (cntType > localSize) cntType = localSize; numAggs = cntType; // Round-robin thru allNodeRanks - pick the 0's, then the 1's, etc int currentNodeRank = 0; // node rank currently being selected as aggregator int rankListIndex = 0; int currentAllNodeIndex = 0; fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int)); while (rankListIndex < numAggs) { int foundEntry = 0; while (!foundEntry && (currentAllNodeIndex < localSize)) { if (allNodeRanks[currentAllNodeIndex] == currentNodeRank) { fd->hints->ranklist[rankListIndex++] = currentAllNodeIndex; foundEntry = 1; } currentAllNodeIndex++; } if (!foundEntry) { currentNodeRank++; currentAllNodeIndex = 0; } } // while break; } // switch(cntType) } // if (ioAggrCount) else { // default is 1 aggregator per node // take the 0 entries from allNodeRanks int rankListIndex = 0; fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int)); for (i=0;i<localSize;i++) { if (allNodeRanks[i] == 0) { fd->hints->ranklist[rankListIndex++] = i; numAggs++; } } } ADIOI_Free(allNodeRanks); } if ( getenv("MP_I_SHOW_AGGRS") ) { if (myRank == 0) { printf("Agg rank list of %d generated:\n", numAggs); for (i=0;i<numAggs;i++) { printf("%d ",fd->hints->ranklist[i]); } printf("\n"); } } fd->hints->cb_nodes = numAggs; return 0; }
/*@ MPI_File_open - Opens a file Input Parameters: . comm - communicator (handle) . filename - name of file to open (string) . amode - file access mode (integer) . info - info object (handle) Output Parameters: . fh - file handle (handle) .N fortran @*/ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_File *fh) { int error_code, file_system, flag, tmp_amode, rank, orig_amode; #ifndef PRINT_ERR_MSG static char myname[] = "MPI_FILE_OPEN"; #endif int err, min_code; char *tmp; MPI_Comm dupcomm, dupcommself; #ifdef MPI_hpux int fl_xmpi; HPMP_IO_OPEN_START(fl_xmpi, comm); #endif /* MPI_hpux */ error_code = MPI_SUCCESS; if (comm == MPI_COMM_NULL) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: Invalid communicator\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_COMM, MPIR_ERR_COMM_NULL, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } MPI_Comm_test_inter(comm, &flag); if (flag) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: Intercommunicator cannot be passed to MPI_File_open\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_COMM, MPIR_ERR_COMM_INTER, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) + ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: Exactly one of MPI_MODE_RDONLY, MPI_MODE_WRONLY, or MPI_MODE_RDWR must be specified\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 3, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } if ((amode & MPI_MODE_RDONLY) && ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: It is erroneous to specify MPI_MODE_CREATE or MPI_MODE_EXCL with MPI_MODE_RDONLY\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 5, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: It is erroneous to specify MPI_MODE_SEQUENTIAL with MPI_MODE_RDWR\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 7, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } /* check if amode is the same on all processes */ MPI_Comm_dup(comm, &dupcomm); tmp_amode = amode; /* Removed this check because broadcast is too expensive. MPI_Bcast(&tmp_amode, 1, MPI_INT, 0, dupcomm); if (amode != tmp_amode) { FPRINTF(stderr, "MPI_File_open: amode must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } */ /* check if ADIO has been initialized. If not, initialize it */ if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { /* check if MPI itself has been initialized. If not, flag an error. Can't initialize it here, because don't know argc, argv */ MPI_Initialized(&flag); if (!flag) { FPRINTF(stderr, "Error: MPI_Init() must be called before using MPI-IO\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, (void *) 0); /* put a dummy attribute on MPI_COMM_WORLD, because we want the delete function to be called when MPI_COMM_WORLD is freed. Hopefully the MPI library frees MPI_COMM_WORLD when MPI_Finalize is called, though the standard does not mandate this. */ MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0); /* initialize ADIO */ ADIO_Init( (int *)0, (char ***)0, &error_code); } file_system = -1; tmp = strchr(filename, ':'); #ifdef WIN32 // Unfortunately Windows uses ':' behind the drive letter. // So we check if there is only one letter before the ':' // Please do not use a single letter filesystem name! if(tmp && ((tmp-filename) == 1)) tmp = 0; #endif if (!tmp) { ADIO_FileSysType(filename, &file_system, &err); if (err != MPI_SUCCESS) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: Can't determine the file-system type. Check the filename/path you provided and try again. Otherwise, prefix the filename with a string to indicate the type of file sytem (piofs:, pfs:, nfs:, ufs:, hfs:, xfs:, sfs:, pvfs:, svm:).\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_FSTYPE, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } MPI_Allreduce(&file_system, &min_code, 1, MPI_INT, MPI_MIN, dupcomm); if (min_code == ADIO_NFS) file_system = ADIO_NFS; } #ifndef PFS if (!strncmp(filename, "pfs:", 4) || !strncmp(filename, "PFS:", 4) || (file_system == ADIO_PFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PFS, myname, (char *) 0, (char *) 0,"PFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef PIOFS if (!strncmp(filename, "piofs:", 6) || !strncmp(filename, "PIOFS:", 6) || (file_system == ADIO_PIOFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PIOFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PIOFS, myname, (char *) 0, (char *) 0,"PIOFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef UFS if (!strncmp(filename, "ufs:", 4) || !strncmp(filename, "UFS:", 4) || (file_system == ADIO_UFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the UFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_UFS, myname, (char *) 0, (char *) 0,"UFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef NFS if (!strncmp(filename, "nfs:", 4) || !strncmp(filename, "NFS:", 4) || (file_system == ADIO_NFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the NFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_NFS, myname, (char *) 0, (char *) 0,"NFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef HFS if (!strncmp(filename, "hfs:", 4) || !strncmp(filename, "HFS:", 4) || (file_system == ADIO_HFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the HFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_HFS, myname, (char *) 0, (char *) 0,"HFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef XFS if (!strncmp(filename, "xfs:", 4) || !strncmp(filename, "XFS:", 4) || (file_system == ADIO_XFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the XFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_XFS, myname, (char *) 0, (char *) 0,"XFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef SFS if (!strncmp(filename, "sfs:", 4) || !strncmp(filename, "SFS:", 4) || (file_system == ADIO_SFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_SFS, myname, (char *) 0, (char *) 0,"SFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef PVFS if (!strncmp(filename, "pvfs:", 5) || !strncmp(filename, "PVFS:", 5) || (file_system == ADIO_PVFS)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PVFS file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PVFS, myname, (char *) 0, (char *) 0,"PVFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef SVM if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "SVM:", 4) || (file_system == ADIO_SVM)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SVM file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_SVM, myname, (char *) 0, (char *) 0,"SVM"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif #ifndef NTFS if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "NTFS:", 4) || (file_system == ADIO_SVM)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SVM file system\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_NTFS, myname, (char *) 0, (char *) 0,"NTFS"); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } #endif if (!strncmp(filename, "pfs:", 4) || !strncmp(filename, "PFS:", 4)) { file_system = ADIO_PFS; filename += 4; } else if (!strncmp(filename, "piofs:", 6) || !strncmp(filename, "PIOFS:", 6)) { file_system = ADIO_PIOFS; filename += 6; } else if (!strncmp(filename, "ufs:", 4) || !strncmp(filename, "UFS:", 4)) { file_system = ADIO_UFS; filename += 4; } else if (!strncmp(filename, "nfs:", 4) || !strncmp(filename, "NFS:", 4)) { file_system = ADIO_NFS; filename += 4; } else if (!strncmp(filename, "hfs:", 4) || !strncmp(filename, "HFS:", 4)) { file_system = ADIO_HFS; filename += 4; } else if (!strncmp(filename, "xfs:", 4) || !strncmp(filename, "XFS:", 4)) { file_system = ADIO_XFS; filename += 4; } else if (!strncmp(filename, "sfs:", 4) || !strncmp(filename, "SFS:", 4)) { file_system = ADIO_SFS; filename += 4; } else if (!strncmp(filename, "pvfs:", 5) || !strncmp(filename, "PVFS:", 5)) { file_system = ADIO_PVFS; filename += 5; } else if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "SVM:", 4)) { file_system = ADIO_SVM; filename += 4; } else if (!strncmp(filename, "ntfs:", 4) || !strncmp(filename, "NTFS:", 4)) { file_system = ADIO_NTFS; filename += 5; } if (((file_system == ADIO_PIOFS) || (file_system == ADIO_PVFS)) && (amode & MPI_MODE_SEQUENTIAL)) { #ifdef PRINT_ERR_MSG FPRINTF(stderr, "MPI_File_open: MPI_MODE_SEQUENTIAL not supported on PIOFS and PVFS\n"); MPI_Abort(MPI_COMM_WORLD, 1); #else error_code = MPIR_Err_setmsg(MPI_ERR_UNSUPPORTED_OPERATION, MPIR_ERR_NO_MODE_SEQ, myname, (char *) 0, (char *) 0); return ADIOI_Error(MPI_FILE_NULL, error_code, myname); #endif } orig_amode = amode; MPI_Comm_rank(dupcomm, &rank); if ((amode & MPI_MODE_CREATE) && (amode & MPI_MODE_EXCL)) { /* the open should fail if the file exists. Only process 0 should check this. Otherwise, if all processes try to check and the file does not exist, one process will create the file and others who reach later will return error. */ if (!rank) { MPI_Comm_dup(MPI_COMM_SELF, &dupcommself); /* this dup is freed either in ADIO_Open if the open fails, or in ADIO_Close */ *fh = ADIO_Open(dupcommself, filename, file_system, amode, 0, MPI_BYTE, MPI_BYTE, M_ASYNC, info, ADIO_PERM_NULL, &error_code); /* broadcast the error code to other processes */ MPI_Bcast(&error_code, 1, MPI_INT, 0, dupcomm); /* if no error, close the file. It will be reopened normally below. */ if (error_code == MPI_SUCCESS) ADIO_Close(*fh, &error_code); } else MPI_Bcast(&error_code, 1, MPI_INT, 0, dupcomm); if (error_code != MPI_SUCCESS) { MPI_Comm_free(&dupcomm); *fh = MPI_FILE_NULL; #ifdef MPI_hpux HPMP_IO_OPEN_END(fl_xmpi, *fh, comm); #endif /* MPI_hpux */ return error_code; } else amode = amode ^ MPI_MODE_EXCL; /* turn off MPI_MODE_EXCL */ } /* use default values for disp, etype, filetype */ /* set iomode=M_ASYNC. It is used to implement the Intel PFS interface on top of ADIO. Not relevant for MPI-IO implementation */ *fh = ADIO_Open(dupcomm, filename, file_system, amode, 0, MPI_BYTE, MPI_BYTE, M_ASYNC, info, ADIO_PERM_NULL, &error_code); /* if MPI_MODE_EXCL was removed, add it back */ if ((error_code == MPI_SUCCESS) && (amode != orig_amode)) (*fh)->access_mode = orig_amode; /* determine name of file that will hold the shared file pointer */ /* can't support shared file pointers on a file system that doesn't support file locking, e.g., PIOFS, PVFS */ if ((error_code == MPI_SUCCESS) && ((*fh)->file_system != ADIO_PIOFS) && ((*fh)->file_system != ADIO_PVFS)) { ADIOI_Shfp_fname(*fh, rank); /* if MPI_MODE_APPEND, set the shared file pointer to end of file. indiv. file pointer already set to end of file in ADIO_Open. Here file view is just bytes. */ if ((*fh)->access_mode & MPI_MODE_APPEND) { if (!rank) ADIO_Set_shared_fp(*fh, (*fh)->fp_ind, &error_code); MPI_Barrier(dupcomm); } } #ifdef MPI_hpux HPMP_IO_OPEN_END(fl_xmpi, *fh, comm); #endif /* MPI_hpux */ return error_code; }