Exemplo n.º 1
0
    /*!
    \param [in] parent Pointer to context on client side
    \param [in] intraComm_ communicator of group client
    \param [in] interComm_ communicator of group server
    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used on case of attached mode)
    */
    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4)
    {
      context = parent;
      intraComm = intraComm_;
      interComm = interComm_;
      MPI_Comm_rank(intraComm, &clientRank);
      MPI_Comm_size(intraComm, &clientSize);

      int flag;
      MPI_Comm_test_inter(interComm, &flag);
      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
      else  MPI_Comm_size(interComm, &serverSize);

      if (clientSize < serverSize)
      {
        int serverByClient = serverSize / clientSize;
        int remain = serverSize % clientSize;
        int rankStart = serverByClient * clientRank;

        if (clientRank < remain)
        {
          serverByClient++;
          rankStart += clientRank;
        }
        else
          rankStart += remain;

        for (int i = 0; i < serverByClient; i++)
          ranksServerLeader.push_back(rankStart + i);

        ranksServerNotLeader.resize(0);
      }
      else
      {
        int clientByServer = clientSize / serverSize;
        int remain = clientSize % serverSize;

        if (clientRank < (clientByServer + 1) * remain)
        {
          if (clientRank % (clientByServer + 1) == 0)
            ranksServerLeader.push_back(clientRank / (clientByServer + 1));
          else
            ranksServerNotLeader.push_back(clientRank / (clientByServer + 1));
        }
        else
        {
          int rank = clientRank - (clientByServer + 1) * remain;
          if (rank % clientByServer == 0)
            ranksServerLeader.push_back(remain + rank / clientByServer);
          else
            ranksServerNotLeader.push_back(remain + rank / clientByServer);
        }        
      }

      timeLine = 0;
    }
Exemplo n.º 2
0
int MPIX_Neighbor_alltoallw_x(const void *sendbuf, const MPI_Count sendcounts[],
                              const MPI_Aint sdispls[], const MPI_Datatype sendtypes[],
                              void *recvbuf, const MPI_Count recvcounts[],
                              const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
                              MPI_Comm comm)
{
    int rc = MPI_SUCCESS;

    int is_intercomm;
    MPI_Comm_test_inter(comm, &is_intercomm);
    if (is_intercomm)
        BigMPI_Error("BigMPI does not support intercommunicators yet.\n");

    if (sendbuf==MPI_IN_PLACE)
        BigMPI_Error("BigMPI does not support in-place in the v-collectives.  Sorry. \n");

    int size, rank;
    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    int          * newsendcounts = malloc(size*sizeof(int));          assert(newsendcounts!=NULL);
    MPI_Datatype * newsendtypes  = malloc(size*sizeof(MPI_Datatype)); assert(newsendtypes!=NULL);
    MPI_Aint     * newsdispls    = malloc(size*sizeof(MPI_Aint));     assert(newsdispls!=NULL);

    int          * newrecvcounts = malloc(size*sizeof(int));          assert(newrecvcounts!=NULL);
    MPI_Datatype * newrecvtypes  = malloc(size*sizeof(MPI_Datatype)); assert(newrecvtypes!=NULL);
    MPI_Aint     * newrdispls    = malloc(size*sizeof(MPI_Aint));     assert(newrdispls!=NULL);

    BigMPI_Convert_vectors(size,
                           0 /* splat count */, 0, sendcounts,
                           0 /* splat type */, 0, sendtypes,
                           0 /* zero displs */, sdispls,
                           newsendcounts, newsendtypes, newsdispls);

    BigMPI_Convert_vectors(size,
                           0 /* splat count */, 0, recvcounts,
                           0 /* splat type */, 0, recvtypes,
                           0 /* zero displs */, rdispls,
                           newrecvcounts, newrecvtypes, newrdispls);

    rc = MPI_Neighbor_alltoallw(sendbuf, newsendcounts, newsdispls, newsendtypes,
                                recvbuf, newrecvcounts, newrdispls, newrecvtypes, comm);

    for (int i=0; i<size; i++) {
        MPI_Type_free(&newsendtypes[i]);
        MPI_Type_free(&newrecvtypes[i]);
    }
    free(newsendcounts);
    free(newsdispls);
    free(newsendtypes);

    free(newrecvcounts);
    free(newrecvtypes);
    free(newrdispls);

    return rc;
}
Exemplo n.º 3
0
void mpi_comm_test_inter_f(MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *ierr)
{
    MPI_Comm c_comm = MPI_Comm_f2c (*comm);
    OMPI_LOGICAL_NAME_DECL(flag);

    *ierr = OMPI_INT_2_FINT(MPI_Comm_test_inter(c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)));
    if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) {
        OMPI_SINGLE_INT_2_LOGICAL(flag);
    }
}
Exemplo n.º 4
0
int MTestTestComm(MPI_Comm comm)
{
    int is_inter;

    if (comm == MPI_COMM_NULL)
        return 0;

    MPI_Comm_test_inter(comm, &is_inter);

    if (is_inter)
        return MTestTestIntercomm(comm);
    else
        return MTestTestIntracomm(comm);
}
Exemplo n.º 5
0
/*@

MPI_Comm_remote_group - Accesses the remote group associated with 
                        the given inter-communicator

Input Parameter:
. comm - Communicator (must be intercommunicator)

Output Parameter:
. group - remote group of communicator

.N fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
@*/
int MPI_Comm_remote_group ( MPI_Comm comm, MPI_Group *group )
{
    struct MPIR_COMMUNICATOR *comm_ptr;
    struct MPIR_GROUP *group_ptr;
    int flag;
    int mpi_errno = MPI_SUCCESS;
    static char myname[] = "MPI_COMM_REMOTE_GROUP";

    TR_PUSH(myname);
    comm_ptr = MPIR_GET_COMM_PTR(comm);
    MPIR_TEST_MPI_COMM(comm,comm_ptr,comm_ptr,myname );

    /* Check for intra-communicator */
    MPI_Comm_test_inter ( comm, &flag );
    if (!flag) return MPIR_ERROR(comm_ptr,
	 MPIR_ERRCLASS_TO_CODE(MPI_ERR_COMM,MPIR_ERR_COMM_INTRA),myname);

    MPIR_Group_dup( comm_ptr->group, &group_ptr );
    *group = group_ptr->self;
    TR_POP;
    return (MPI_SUCCESS);
}
Exemplo n.º 6
0
/*
 * getworldrank: Translate rank of process from communicator comm to
 *              communicator MPI_COMM_WORLD.
 */
int getworldrank(MPI_Comm comm, int rank)
{
    static MPI_Group worldgroup;
    static int isfirstcall = 1;
    int worldrank, isintercomm = 0;
    MPI_Group group;

    MPI_Comm_test_inter(comm, &isintercomm);
    if (isintercomm) {
        MPI_Comm_remote_group(comm, &group);
    } else {
        MPI_Comm_group(comm, &group);
    }

    if (isfirstcall) {
        MPI_Comm_group(MPI_COMM_WORLD, &worldgroup);
        isfirstcall = 0;
    }
    MPI_Group_translate_ranks(group, 1, &rank, worldgroup, &worldrank);
    MPI_Group_free(&group);
    return worldrank;
}
Exemplo n.º 7
0
EXPORT_MPI_API void FORTRAN_API mpi_comm_test_inter_ ( MPI_Fint *comm, MPI_Fint *flag, MPI_Fint *__ierr )
{
    int l_flag;
    *__ierr = MPI_Comm_test_inter( MPI_Comm_f2c(*comm), &l_flag);
    *flag = MPIR_TO_FLOG(l_flag);
}
Exemplo n.º 8
0
Arquivo: open.c Projeto: 00datman/ompi
/*@
    MPI_File_open - Opens a file

Input Parameters:
. comm - communicator (handle)
. filename - name of file to open (string)
. amode - file access mode (integer)
. info - info object (handle)

Output Parameters:
. fh - file handle (handle)

.N fortran
@*/
int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode,
                  MPI_Info info, MPI_File *fh)
{
    int error_code = MPI_SUCCESS, file_system, flag, tmp_amode=0, rank;
    char *tmp;
    MPI_Comm dupcomm = MPI_COMM_NULL;
    ADIOI_Fns *fsops;
    static char myname[] = "MPI_FILE_OPEN";
#ifdef MPI_hpux
    int fl_xmpi;

    HPMP_IO_OPEN_START(fl_xmpi, comm);
#endif /* MPI_hpux */

    MPIU_THREAD_CS_ENTER(ALLFUNC,);

    /* --BEGIN ERROR HANDLING-- */
    MPIO_CHECK_COMM(comm, myname, error_code);
    MPIO_CHECK_INFO_ALL(info, error_code, comm);
    /* --END ERROR HANDLING-- */

    error_code = MPI_Comm_test_inter(comm, &flag);
    /* --BEGIN ERROR HANDLING-- */
    if (error_code || flag)
    {
	error_code = MPIO_Err_create_code(error_code, MPIR_ERR_RECOVERABLE,
					  myname, __LINE__, MPI_ERR_COMM,
					  "**commnotintra", 0);
	goto fn_fail;
    }

    if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) +
	 ((amode&MPI_MODE_WRONLY)?1:0) != 1 )
    {
	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					  myname, __LINE__, MPI_ERR_AMODE,
					  "**fileamodeone", 0);
	goto fn_fail;
    }

    if ((amode & MPI_MODE_RDONLY) &&
            ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL)))
    {
	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					  myname, __LINE__, MPI_ERR_AMODE,
					  "**fileamoderead", 0);
	goto fn_fail;
    }

    if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL))
    {
	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					  myname, __LINE__, MPI_ERR_AMODE,
					  "**fileamodeseq", 0);
	goto fn_fail;
    }

    MPI_Comm_dup(comm, &dupcomm);

/* check if ADIO has been initialized. If not, initialize it */
    MPIR_MPIOInit(&error_code);
    if (error_code != MPI_SUCCESS) goto fn_fail;

/* check if amode is the same on all processes: at first glance, one might try
 * to use a built-in operator like MPI_BAND, but we need every mpi process to
 * agree the amode was not the same.  Consider process A with
 * MPI_MODE_CREATE|MPI_MODE_RDWR, and B with MPI_MODE_RDWR:  MPI_BAND yields
 * MPI_MODE_RDWR.  A determines amodes are different, but B proceeds having not
 * detected an error */
    MPI_Allreduce(&amode, &tmp_amode, 1, MPI_INT, ADIO_same_amode, dupcomm);

    if (tmp_amode == ADIO_AMODE_NOMATCH) {
	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
			myname, __LINE__, MPI_ERR_NOT_SAME,
			"**fileamodediff", 0);
	goto fn_fail;
    }
    /* --END ERROR HANDLING-- */

    file_system = -1;

    /* resolve file system type from file name; this is a collective call */
    ADIO_ResolveFileType(dupcomm, filename, &file_system, &fsops, &error_code);
    /* --BEGIN ERROR HANDLING-- */
    if (error_code != MPI_SUCCESS)
    {
	/* ADIO_ResolveFileType() will print as informative a message as it
	 * possibly can or call MPIO_Err_setmsg.  We just need to propagate
	 * the error up.
	 */
	goto fn_fail;
    }

    /* --END ERROR HANDLING-- */

    /* strip off prefix if there is one, but only skip prefixes
     * if they are greater than length one to allow for windows
     * drive specifications (e.g. c:\...) */

    tmp = strchr(filename, ':');
    if (tmp > filename + 1) {
	filename = tmp + 1;
    }

/* use default values for disp, etype, filetype */

    *fh = ADIO_Open(comm, dupcomm, filename, file_system, fsops, amode, 0,
		    MPI_BYTE, MPI_BYTE, info, ADIO_PERM_NULL, &error_code);

    /* --BEGIN ERROR HANDLING-- */
    if (error_code != MPI_SUCCESS) {
	goto fn_fail;
    }
    /* --END ERROR HANDLING-- */

    /* if MPI_MODE_SEQUENTIAL requested, file systems cannot do explicit offset
     * or independent file pointer accesses, leaving not much else aside from
     * shared file pointer accesses. */
    if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL))
    {
        error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
			                  myname, __LINE__,
					  MPI_ERR_UNSUPPORTED_OPERATION,
					  "**iosequnsupported", 0);
	ADIO_Close(*fh, &error_code);
	goto fn_fail;
    }

    /* determine name of file that will hold the shared file pointer */
    /* can't support shared file pointers on a file system that doesn't
       support file locking. */
    if ((error_code == MPI_SUCCESS) &&
		    ADIO_Feature((*fh), ADIO_SHARED_FP)) {
	MPI_Comm_rank(dupcomm, &rank);
	ADIOI_Shfp_fname(*fh, rank, &error_code);
	if (error_code != MPI_SUCCESS)
	    goto fn_fail;

        /* if MPI_MODE_APPEND, set the shared file pointer to end of file.
           indiv. file pointer already set to end of file in ADIO_Open.
           Here file view is just bytes. */
	if ((*fh)->access_mode & MPI_MODE_APPEND) {
	    if (rank == (*fh)->hints->ranklist[0])  /* only one person need set the sharedfp */
		    ADIO_Set_shared_fp(*fh, (*fh)->fp_ind, &error_code);
	    MPI_Barrier(dupcomm);
	}
    }

#ifdef MPI_hpux
    HPMP_IO_OPEN_END(fl_xmpi, *fh, comm);
#endif /* MPI_hpux */

fn_exit:
    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    return error_code;
fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    if (dupcomm != MPI_COMM_NULL) MPI_Comm_free(&dupcomm);
    error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Exemplo n.º 9
0
/* create a loop between all the elements types */
int mpi_lsa_create_intercoms(com_lsa * com){
	int prev, next,flag;
	int prev_size,next_size,size;
	/* create first connection between intracommunicators thanks to an intercommunicator */
	/* one way */
	MPI_Barrier(MPI_COMM_WORLD);
	if(com->rank_world==0)printf("]> Creating intercommunicators\n-One Way :\n");
	MPI_Barrier(MPI_COMM_WORLD);
	printf("\t *> %d -> %d ",com->rank_world,com->master.com[4-((com->color_group)+1)]);
	MPI_Barrier(MPI_COMM_WORLD);



	MPI_Intercomm_create(com->com_group,0,
					MPI_COMM_WORLD,com->master.com[4-((com->color_group)+1)], 
					com->rank_group,
					&(com->inter.com[4-((com->color_group)+1)]));


	
	MPI_Barrier(MPI_COMM_WORLD);
	if(com->rank_world==0)printf("\n]> The Other : \n");
	MPI_Barrier(MPI_COMM_WORLD);
	printf("\t *> %d -> %d ",(com->color_group),(4-((com->color_group)-1)%4)%4);
	MPI_Barrier(MPI_COMM_WORLD);
	if(com->rank_world==0)printf("\n");
	MPI_Barrier(MPI_COMM_WORLD);



 	/* the other */
	MPI_Intercomm_create(com->com_group,0,
					 com->com_world,com->master.com[(4-((com->color_group)-1)%4)%4],
	 				com->rank_group,
	 				&(com->inter.com[(4-((com->color_group)-1)%4)%4]));


	/// WHY THIS ????????
	if((4-((com->color_group)-1)%4)%4>com->color_group){
		next=(4-(com->color_group-1)%4)%4;
		prev=4-((com->color_group)+1);
	} else {
		prev=(4-(com->color_group-1)%4)%4;
		next=4-((com->color_group)+1);
	}

	/* set the in and out communicators */
	com->out_com=com->inter.com[next];
	MPI_Comm_test_inter(com->inter.com[next],&flag);
		if(!flag){
			mpi_lsa_print("\n\n\n\nproblem with inter.[next]\n\n\n\n\n", com);
		}
	com->in_com=com->inter.com[prev];
		MPI_Comm_test_inter(com->inter.com[prev],&flag);
		if(!flag){
			mpi_lsa_print("\n\n\n\n\nproblem with inter.[prev]\n\n\n\n", com);
		}

	MPI_Comm_remote_size(com->out_com,&next_size);
	MPI_Comm_remote_size(com->in_com,&prev_size);
	MPI_Comm_size(com->com_group,&size);
	if(com->color_group==0) 		 printf("GMRES 1: my intercomm with LS %d \n",com->in_com);
	if(com->rank_world==0) printf("]> In and Out communicators : \n");
		MPI_Barrier(MPI_COMM_WORLD);

	if(com->color_group==0) 		 printf("GMRES :   ");
	else if(com->color_group==1) printf("MAIN :    ");
	else if(com->color_group==2) printf("ARNOLDI : ");
	else if(com->color_group==3) printf("LS :      ");

	printf("%d: %d (%d) -> %d (%d) -> %d (%d)   in_com: %d,  out_com: %d\n",com->rank_world,com->master.com[prev],prev_size,com->color_group,size,com->master.com[next],next_size, com->in_com, com->out_com);
	

	return 0;
}
Exemplo n.º 10
0
FORT_DLL_SPEC void FORT_CALL mpi_comm_test_inter_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *ierr ){
    int l2;
    *ierr = MPI_Comm_test_inter( (MPI_Comm)(*v1), &l2 );
    if (*ierr == MPI_SUCCESS) *v2 = MPIR_TO_FLOG(l2);
}
Exemplo n.º 11
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int size, isLeft;
    MPI_Comm intercomm, newcomm;

    MTest_Init( &argc, &argv );

    MPI_Comm_size( MPI_COMM_WORLD, &size );
    if (size < 4) {
	printf( "This test requires at least 4 processes\n" );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }

    while (MTestGetIntercomm( &intercomm, &isLeft, 2 )) {
	int key, color;

        if (intercomm == MPI_COMM_NULL) continue;

	/* Split this intercomm.  The new intercomms contain the 
	   processes that had odd (resp even) rank in their local group
	   in the original intercomm */
	MTestPrintfMsg( 1, "Created intercomm %s\n", MTestGetIntercommName() );
	MPI_Comm_rank( intercomm, &key );
	color = (key % 2);
	MPI_Comm_split( intercomm, color, key, &newcomm );
	/* Make sure that the new communicator has the appropriate pieces */
	if (newcomm != MPI_COMM_NULL) {
	    int orig_rsize, orig_size, new_rsize, new_size;
	    int predicted_size, flag, commok=1;

	    MPI_Comm_test_inter( intercomm, &flag );
	    if (!flag) {
		errs++;
		printf( "Output communicator is not an intercomm\n" );
		commok = 0;
	    }

	    MPI_Comm_remote_size( intercomm, &orig_rsize );
	    MPI_Comm_remote_size( newcomm, &new_rsize );
	    MPI_Comm_size( intercomm, &orig_size );
	    MPI_Comm_size( newcomm, &new_size );
	    /* The local size is 1/2 the original size, +1 if the 
	       size was odd and the color was even.  More precisely,
	       let n be the orig_size.  Then
	                        color 0     color 1
	       orig size even    n/2         n/2
	       orig size odd     (n+1)/2     n/2

	       However, since these are integer valued, if n is even,
	       then (n+1)/2 = n/2, so this table is much simpler:
	                        color 0     color 1
	       orig size even    (n+1)/2     n/2
	       orig size odd     (n+1)/2     n/2
	       
	    */
	    predicted_size = (orig_size + !color) / 2; 
	    if (predicted_size != new_size) {
		errs++;
		printf( "Predicted size = %d but found %d for %s (%d,%d)\n",
			predicted_size, new_size, MTestGetIntercommName(),
			orig_size, orig_rsize );
		commok = 0;
	    }
	    predicted_size = (orig_rsize + !color) / 2;
	    if (predicted_size != new_rsize) {
		errs++;
		printf( "Predicted remote size = %d but found %d for %s (%d,%d)\n",
			predicted_size, new_rsize, MTestGetIntercommName(), 
			orig_size, orig_rsize );
		commok = 0;
	    }
	    /* ... more to do */
	    if (commok) {
		errs += TestIntercomm( newcomm );
	    }
	}
	else {
	    int orig_rsize;
	    /* If the newcomm is null, then this means that remote group
	       for this color is of size zero (since all processes in this 
	       test have been given colors other than MPI_UNDEFINED).
	       Confirm that here */
	    /* FIXME: ToDo */
	    MPI_Comm_remote_size( intercomm, &orig_rsize );
	    if (orig_rsize == 1) {
		if (color == 0) {
		    errs++;
		    printf( "Returned null intercomm when non-null expected\n" );
		}
	    }
	}
	if (newcomm != MPI_COMM_NULL) 
	    MPI_Comm_free( &newcomm );
	MPI_Comm_free( &intercomm );
    }
    MTest_Finalize(errs);

    MPI_Finalize();

    return 0;
}
Exemplo n.º 12
0
/*@

MPI_Cart_create - Makes a new communicator to which topology information
                  has been attached

Input Parameters:
+ comm_old - input communicator (handle) 
. ndims - number of dimensions of cartesian grid (integer) 
. dims - integer array of size ndims specifying the number of processes in 
  each dimension 
. periods - logical array of size ndims specifying whether the grid is 
  periodic (true) or not (false) in each dimension 
- reorder - ranking may be reordered (true) or not (false) (logical) 

Output Parameter:
. comm_cart - communicator with new cartesian topology (handle) 

Algorithm:
We ignore 'reorder' info currently.

.N fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_TOPOLOGY
.N MPI_ERR_DIMS
.N MPI_ERR_ARG
@*/
int MPI_Cart_create ( MPI_Comm comm_old, int ndims, int *dims, int *periods, 
		      int reorder, MPI_Comm *comm_cart )
{
  int range[1][3];
  MPI_Group group_old, group;
  int i, rank, num_ranks = 1;
  int mpi_errno = MPI_SUCCESS;
  int flag, size;
  MPIR_TOPOLOGY *topo;
  struct MPIR_COMMUNICATOR *comm_old_ptr;
  static char myname[] = "MPI_CART_CREATE";

  TR_PUSH(myname);
  comm_old_ptr = MPIR_GET_COMM_PTR(comm_old);

  /* Check validity of arguments */
#ifndef MPIR_NO_ERROR_CHECKING
  MPIR_TEST_MPI_COMM(comm_old,comm_old_ptr,comm_old_ptr,myname);
  MPIR_TEST_ARG(comm_cart);
  MPIR_TEST_ARG(periods);
  if (ndims < 1 || dims == (int *)0) mpi_errno = MPI_ERR_DIMS;
  if (mpi_errno)
	return MPIR_ERROR(comm_old_ptr, mpi_errno, myname );
  
  /* Check for Intra-communicator */
  MPI_Comm_test_inter ( comm_old, &flag );
  if (flag)
    return MPIR_ERROR(comm_old_ptr, 
            MPIR_ERRCLASS_TO_CODE(MPI_ERR_COMM,MPIR_ERR_COMM_INTER), myname );
#endif

  /* Determine number of ranks in topology */
  for ( i=0; i<ndims; i++ )
    num_ranks    *= (dims[i]>0)?dims[i]:-dims[i];
  if ( num_ranks < 1 ) {
    (*comm_cart)  = MPI_COMM_NULL;
    return MPIR_ERROR( comm_old_ptr, MPI_ERR_TOPOLOGY, myname );
  }

  /* Is the old communicator big enough? */
  MPIR_Comm_size (comm_old_ptr, &size);
  if (num_ranks > size) {
      mpi_errno = MPIR_Err_setmsg( MPI_ERR_TOPOLOGY, MPIR_ERR_TOPO_TOO_LARGE, 
				   myname, 
                  "Topology size is larger than size of communicator",
		  "Topology size %d is greater than communicator size %d", 
				   num_ranks, size );
	return MPIR_ERROR(comm_old_ptr, mpi_errno, myname );
  }
	
  /* Make new comm */
  range[0][0] = 0; range[0][1] = num_ranks - 1; range[0][2] = 1;
  MPI_Comm_group ( comm_old, &group_old );
  MPI_Group_range_incl ( group_old, 1, range, &group );
  MPI_Comm_create  ( comm_old, group, comm_cart );
  MPI_Group_free( &group );
  MPI_Group_free( &group_old );

  /* Store topology information in new communicator */
  if ( (*comm_cart) != MPI_COMM_NULL ) {
      MPIR_ALLOC(topo,(MPIR_TOPOLOGY *) MPIR_SBalloc ( MPIR_topo_els ),
		 comm_old_ptr,MPI_ERR_EXHAUSTED,myname);
      MPIR_SET_COOKIE(&topo->cart,MPIR_CART_TOPOL_COOKIE)
	  topo->cart.type         = MPI_CART;
      topo->cart.nnodes       = num_ranks;
      topo->cart.ndims        = ndims;
      MPIR_ALLOC(topo->cart.dims,(int *)MALLOC( sizeof(int) * 3 * ndims ),
		 comm_old_ptr,MPI_ERR_EXHAUSTED,myname);
      topo->cart.periods      = topo->cart.dims + ndims;
      topo->cart.position     = topo->cart.periods + ndims;
      for ( i=0; i<ndims; i++ ) {
	  topo->cart.dims[i]    = dims[i];
	  topo->cart.periods[i] = periods[i];
      }

    /* Compute my position */
    MPI_Comm_rank ( (*comm_cart), &rank );
    for ( i=0; i < ndims; i++ ) {
      num_ranks = num_ranks / dims[i];
      topo->cart.position[i]  = rank / num_ranks;
      rank   = rank % num_ranks;
    }

    /* cache topology information */
    MPI_Attr_put ( (*comm_cart), MPIR_TOPOLOGY_KEYVAL, (void *)topo );
  }
  TR_POP;
  return (mpi_errno);
}
Exemplo n.º 13
0
Arquivo: MPI-api.c Projeto: 8l/rose
void declareBindings (void)
{
  /* === Point-to-point === */
  void* buf;
  int count;
  MPI_Datatype datatype;
  int dest;
  int tag;
  MPI_Comm comm;
  MPI_Send (buf, count, datatype, dest, tag, comm); // L12
  int source;
  MPI_Status status;
  MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15
  MPI_Get_count (&status, datatype, &count);
  MPI_Bsend (buf, count, datatype, dest, tag, comm);
  MPI_Ssend (buf, count, datatype, dest, tag, comm);
  MPI_Rsend (buf, count, datatype, dest, tag, comm);
  void* buffer;
  int size;
  MPI_Buffer_attach (buffer, size); // L22
  MPI_Buffer_detach (buffer, &size);
  MPI_Request request;
  MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25
  MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Issend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irecv (buf, count, datatype, source, tag, comm, &request);
  MPI_Wait (&request, &status);
  int flag;
  MPI_Test (&request, &flag, &status); // L32
  MPI_Request_free (&request);
  MPI_Request* array_of_requests;
  int index;
  MPI_Waitany (count, array_of_requests, &index, &status); // L36
  MPI_Testany (count, array_of_requests, &index, &flag, &status);
  MPI_Status* array_of_statuses;
  MPI_Waitall (count, array_of_requests, array_of_statuses); // L39
  MPI_Testall (count, array_of_requests, &flag, array_of_statuses);
  int incount;
  int outcount;
  int* array_of_indices;
  MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L44--45
  MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L46--47
  MPI_Iprobe (source, tag, comm, &flag, &status); // L48
  MPI_Probe (source, tag, comm, &status);
  MPI_Cancel (&request);
  MPI_Test_cancelled (&status, &flag);
  MPI_Send_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Recv_init (buf, count, datatype, source, tag, comm, &request);
  MPI_Start (&request);
  MPI_Startall (count, array_of_requests);
  void* sendbuf;
  int sendcount;
  MPI_Datatype sendtype;
  int sendtag;
  void* recvbuf;
  int recvcount;
  MPI_Datatype recvtype;
  MPI_Datatype recvtag;
  MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag,
		recvbuf, recvcount, recvtype, source, recvtag,
		comm, &status); // L67--69
  MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag,
			comm, &status); // L70--71
  MPI_Datatype oldtype;
  MPI_Datatype newtype;
  MPI_Type_contiguous (count, oldtype, &newtype); // L74
  int blocklength;
  {
    int stride;
    MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78
  }
  {
    MPI_Aint stride;
    MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82
  }
  int* array_of_blocklengths;
  {
    int* array_of_displacements;
    MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements,
		      oldtype, &newtype); // L87--88
  }
  {
    MPI_Aint* array_of_displacements;
    MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements,
                       oldtype, &newtype); // L92--93
    MPI_Datatype* array_of_types;
    MPI_Type_struct (count, array_of_blocklengths, array_of_displacements,
                     array_of_types, &newtype); // L95--96
  }
  void* location;
  MPI_Aint address;
  MPI_Address (location, &address); // L100
  MPI_Aint extent;
  MPI_Type_extent (datatype, &extent); // L102
  MPI_Type_size (datatype, &size);
  MPI_Aint displacement;
  MPI_Type_lb (datatype, &displacement); // L105
  MPI_Type_ub (datatype, &displacement);
  MPI_Type_commit (&datatype);
  MPI_Type_free (&datatype);
  MPI_Get_elements (&status, datatype, &count);
  void* inbuf;
  void* outbuf;
  int outsize;
  int position;
  MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114
  int insize;
  MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype,
	      comm); // L116--117
  MPI_Pack_size (incount, datatype, comm, &size);

  /* === Collectives === */
  MPI_Barrier (comm); // L121
  int root;
  MPI_Bcast (buffer, count, datatype, root, comm); // L123
  MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
	      root, comm); // L124--125
  int* recvcounts;
  int* displs;
  MPI_Gatherv (sendbuf, sendcount, sendtype,
               recvbuf, recvcounts, displs, recvtype,
	       root, comm); // L128--130
  MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
               root, comm); // L131--132
  int* sendcounts;
  MPI_Scatterv (sendbuf, sendcounts, displs, sendtype,
		recvbuf, recvcount, recvtype, root, comm); // L134--135
  MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
                 comm); // L136--137
  MPI_Allgatherv (sendbuf, sendcount, sendtype,
		  recvbuf, recvcounts, displs, recvtype,
		  comm); // L138--140
  MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
		comm); // L141--142
  int* sdispls;
  int* rdispls;
  MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype,
                 recvbuf, recvcounts, rdispls, recvtype,
		 comm); // L145--147
  MPI_Op op;
  MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149
#if 0
  MPI_User_function function;
  int commute;
  MPI_Op_create (function, commute, &op); // L153
#endif
  MPI_Op_free (&op); // L155
  MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm);
  MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm);
  MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm);

  /* === Groups, contexts, and communicators === */
  MPI_Group group;
  MPI_Group_size (group, &size); // L162
  int rank;
  MPI_Group_rank (group, &rank); // L164
  MPI_Group group1;
  int n;
  int* ranks1;
  MPI_Group group2;
  int* ranks2;
  MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170
  int result;
  MPI_Group_compare (group1, group2, &result); // L172
  MPI_Group newgroup;
  MPI_Group_union (group1, group2, &newgroup); // L174
  MPI_Group_intersection (group1, group2, &newgroup);
  MPI_Group_difference (group1, group2, &newgroup);
  int* ranks;
  MPI_Group_incl (group, n, ranks, &newgroup); // L178
  MPI_Group_excl (group, n, ranks, &newgroup);
  extern int ranges[][3];
  MPI_Group_range_incl (group, n, ranges, &newgroup); // L181
  MPI_Group_range_excl (group, n, ranges, &newgroup);
  MPI_Group_free (&group);
  MPI_Comm_size (comm, &size);
  MPI_Comm_rank (comm, &rank);
  MPI_Comm comm1;
  MPI_Comm comm2;
  MPI_Comm_compare (comm1, comm2, &result);
  MPI_Comm newcomm;
  MPI_Comm_dup (comm, &newcomm);
  MPI_Comm_create (comm, group, &newcomm);
  int color;
  int key;
  MPI_Comm_split (comm, color, key, &newcomm); // L194
  MPI_Comm_free (&comm);
  MPI_Comm_test_inter (comm, &flag);
  MPI_Comm_remote_size (comm, &size);
  MPI_Comm_remote_group (comm, &group);
  MPI_Comm local_comm;
  int local_leader;
  MPI_Comm peer_comm;
  int remote_leader;
  MPI_Comm newintercomm;
  MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag,
			&newintercomm); // L204--205
  MPI_Comm intercomm;
  MPI_Comm newintracomm;
  int high;
  MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209
  int keyval;
#if 0
  MPI_Copy_function copy_fn;
  MPI_Delete_function delete_fn;
  void* extra_state;
  MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215
#endif
  MPI_Keyval_free (&keyval); // L217
  void* attribute_val;
  MPI_Attr_put (comm, keyval, attribute_val); // L219
  MPI_Attr_get (comm, keyval, attribute_val, &flag);
  MPI_Attr_delete (comm, keyval);

  /* === Environmental inquiry === */
  char* name;
  int resultlen;
  MPI_Get_processor_name (name, &resultlen); // L226
  MPI_Errhandler errhandler;
#if 0
  MPI_Handler_function function;
  MPI_Errhandler_create (function, &errhandler); // L230
#endif
  MPI_Errhandler_set (comm, errhandler); // L232
  MPI_Errhandler_get (comm, &errhandler);
  MPI_Errhandler_free (&errhandler);
  int errorcode;
  char* string;
  MPI_Error_string (errorcode, string, &resultlen); // L237
  int errorclass;
  MPI_Error_class (errorcode, &errorclass); // L239
  MPI_Wtime ();
  MPI_Wtick ();
  int argc;
  char** argv;
  MPI_Init (&argc, &argv); // L244
  MPI_Finalize ();
  MPI_Initialized (&flag);
  MPI_Abort (comm, errorcode);
}
Exemplo n.º 14
0
/*
 * Compute the aggregator-related parameters that are required in 2-phase
 * collective IO of ADIO.
 * The parameters are
 * 	. the number of aggregators (proxies) : fd->hints->cb_nodes
 *	. the ranks of the aggregators :        fd->hints->ranklist
 * If MP_IONODEFILE is defined, POE determines all tasks on every node listed
 * in the node file and defines MP_IOTASKLIST with them, making them all
 * aggregators.  Alternatively, the user can explictly set MP_IOTASKLIST
 * themselves.  The format of the MP_IOTASKLIST is a colon-delimited list of
 * task ids, the first entry being the total number of aggregators, for example
 * to specify 4 aggregators on task ids 0,8,16,24  the value would be:
 * 4:0:8:16:24.  If there is no MP_IONODEFILE, or MP_IOTASKLIST, then the
 * default aggregator selection is 1 task per node for every node of the job -
 * additionally, an environment variable MP_IOAGGR_CNT  can be specified, which
 * defines the total number of aggregators, spread evenly across all the nodes.
 * The romio_cb_nodes and romio_cb_config_list hint user settings are ignored.
 */
int
ADIOI_PE_gen_agg_ranklist(ADIO_File fd)
{

    int numAggs = 0;
    char *ioTaskList = getenv( "MP_IOTASKLIST" );
    char *ioAggrCount = getenv("MP_IOAGGR_CNT");
    int i,j;
    int inTERcommFlag = 0;

    int myRank,commSize;
    MPI_Comm_rank(fd->comm, &myRank);
    MPI_Comm_size(fd->comm, &commSize);

    MPI_Comm_test_inter(fd->comm, &inTERcommFlag);
    if (inTERcommFlag) {
      FPRINTF(stderr,"ERROR: ATTENTION: inTERcomms are not supported in MPI-IO - aborting....\n");
      perror("ADIOI_PE_gen_agg_ranklist:");
      MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (ioTaskList) {
      int ioTaskListLen = strlen(ioTaskList);
      int ioTaskListPos = 0;
      char tmpBuf[8];   /* Big enough for 1M tasks (7 digits task ID). */
      tmpBuf[7] = '\0';
      for (i=0; i<7; i++) {
         tmpBuf[i] = *ioTaskList++;      /* Maximum is 7 digits for 1 million. */
         ioTaskListPos++;
         if (*ioTaskList == ':') {       /* If the next char is a ':' ends it. */
             tmpBuf[i+1] = '\0';
             break;
         }
      }
      numAggs = atoi(tmpBuf);
      if (numAggs == 0)
        FPRINTF(stderr,"ERROR: ATTENTION: Number of aggregators specified in MP_IOTASKLIST set at 0 - default aggregator selection will be used.\n");
      else if (!((numAggs > 0 ) && (numAggs <= commSize))) {
        FPRINTF(stderr,"ERROR: ATTENTION: The number of aggregators (%s) specified in MP_IOTASKLIST is outside the communicator task range of %d.\n",tmpBuf,commSize);
        numAggs = commSize;
      }
      fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int));

      int aggIndex = 0;
      while (aggIndex < numAggs) {
         ioTaskList++;                /* Advance past the ':' */
         ioTaskListPos++;
         int allDigits=1;
         for (i=0; i<7; i++) {
            if (*ioTaskList < '0' || *ioTaskList > '9')
              allDigits=0;
            tmpBuf[i] = *ioTaskList++;
            ioTaskListPos++;
            if ( (*ioTaskList == ':') || (*ioTaskList == '\0') ) {
                tmpBuf[i+1] = '\0';
                break;
            }
         }
         if (allDigits) {
           int newAggRank = atoi(tmpBuf);
           if (!((newAggRank >= 0 ) && (newAggRank < commSize))) {
             FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not within the communicator task range of 0 to %d  - it will be ignored.\n",tmpBuf,commSize-1);
           }
           else {
             int aggAlreadyAdded = 0;
             for (i=0;i<aggIndex;i++)
               if (fd->hints->ranklist[i] == newAggRank) {
                 aggAlreadyAdded = 1;
                 break;
               }
             if (!aggAlreadyAdded)
               fd->hints->ranklist[aggIndex++] = newAggRank;
             else
               FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%d' is specified multiple times in MP_IOTASKLIST - duplicates are ignored.\n",newAggRank);
           }
         }
         else {
           FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not a valid integer task id  - it will be ignored.\n",tmpBuf);
         }

         /* At the end check whether the list is shorter than specified. */
         if (ioTaskListPos == ioTaskListLen) {
           if (aggIndex == 0) {
             FPRINTF(stderr,"ERROR: ATTENTION: No aggregators were correctly specified in MP_IOTASKLIST - default aggregator selection will be used.\n");
             ADIOI_Free(fd->hints->ranklist);
           }
           else if (aggIndex < numAggs)
             FPRINTF(stderr,"ERROR: ATTENTION: %d aggregators were specified in MP_IOTASKLIST but only %d were correctly specified - setting the number of aggregators to %d.\n",numAggs, aggIndex,aggIndex);
           numAggs = aggIndex;
         }
      }
    }
    if (numAggs == 0)  {
      MPID_Comm *mpidCommData;

      MPID_Comm_get_ptr(fd->comm,mpidCommData);
      int localSize = mpidCommData->local_size;

      // get my node rank
      int myNodeRank = mpidCommData->intranode_table[mpidCommData->rank];

      int *allNodeRanks = (int *) ADIOI_Malloc (localSize * sizeof(int));

      allNodeRanks[myRank] = myNodeRank;
      MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, allNodeRanks, 1, MPI_INT, fd->comm);

#ifdef AGG_DEBUG
      printf("MPID_Comm data: local_size is %d\nintranode_table entries:\n",mpidCommData->local_size);
      for (i=0;i<localSize;i++) {
        printf("%d ",mpidCommData->intranode_table[i]);
      }
      printf("\ninternode_table entries:\n");
      for (i=0;i<localSize;i++) {
        printf("%d ",mpidCommData->internode_table[i]);
      }
      printf("\n");

      printf("\nallNodeRanks entries:\n");
      for (i=0;i<localSize;i++) {
        printf("%d ",allNodeRanks[i]);
      }
      printf("\n");

#endif

      if (ioAggrCount) {
        int cntType = -1;

        if ( strcasecmp(ioAggrCount, "ALL") ) {
           if ( (cntType = atoi(ioAggrCount)) <= 0 ) {
              /* Input is other non-digit or less than 1 the  assume */
              /* 1 aggregator per node.  Note: atoi(-1) reutns -1.   */
              /* No warning message given here -- done earlier.      */
              cntType = -1;
           }
        }
        else {
           /* ALL is specified set aggr count to localSize */
           cntType = -2;
        }
        switch(cntType) {
           case -1:
              /* 1 aggr/node case */
            {
             int rankListIndex = 0;
             fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
             for (i=0;i<localSize;i++) {
               if (allNodeRanks[i] == 0) {
                 fd->hints->ranklist[rankListIndex++] = i;
                 numAggs++;
               }
             }
            }
              break;
           case -2:
              /* ALL tasks case */
             fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
             for (i=0;i<localSize;i++) {
               fd->hints->ranklist[i] = i;
               numAggs++;
             }
              break;
           default:
              /* Specific aggr count case -- MUST be less than localSize, otherwise set to localSize */
             if (cntType > localSize)
               cntType = localSize;

             numAggs = cntType;
             // Round-robin thru allNodeRanks - pick the 0's, then the 1's, etc
             int currentNodeRank = 0;  // node rank currently being selected as aggregator
             int rankListIndex = 0;
             int currentAllNodeIndex = 0;

             fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int));

             while (rankListIndex < numAggs) {
               int foundEntry = 0;
               while (!foundEntry && (currentAllNodeIndex < localSize)) {
                 if (allNodeRanks[currentAllNodeIndex] == currentNodeRank) {
                   fd->hints->ranklist[rankListIndex++] = currentAllNodeIndex;
                   foundEntry = 1;
                 }
                 currentAllNodeIndex++;
               }
               if (!foundEntry) {
                 currentNodeRank++;
                 currentAllNodeIndex = 0;
               }
             } // while
          break;
        } // switch(cntType)
      } // if (ioAggrCount)

      else { // default is 1 aggregator per node
        // take the 0 entries from allNodeRanks
        int rankListIndex = 0;
        fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
        for (i=0;i<localSize;i++) {
          if (allNodeRanks[i] == 0) {
            fd->hints->ranklist[rankListIndex++] = i;
            numAggs++;
          }
        }
      }

      ADIOI_Free(allNodeRanks);

    }

    if ( getenv("MP_I_SHOW_AGGRS") ) {
      if (myRank == 0) {
        printf("Agg rank list of %d generated:\n", numAggs);
        for (i=0;i<numAggs;i++) {
          printf("%d ",fd->hints->ranklist[i]);
        }
        printf("\n");
      }
    }

    fd->hints->cb_nodes = numAggs;

    return 0;
}
Exemplo n.º 15
0
/*@
    MPI_File_open - Opens a file

Input Parameters:
. comm - communicator (handle)
. filename - name of file to open (string)
. amode - file access mode (integer)
. info - info object (handle)

Output Parameters:
. fh - file handle (handle)

.N fortran
@*/
int MPI_File_open(MPI_Comm comm, char *filename, int amode, 
                  MPI_Info info, MPI_File *fh)
{
    int error_code, file_system, flag, tmp_amode, rank, orig_amode;
#ifndef PRINT_ERR_MSG
    static char myname[] = "MPI_FILE_OPEN";
#endif
    int err, min_code;
    char *tmp;
    MPI_Comm dupcomm, dupcommself;
#ifdef MPI_hpux
    int fl_xmpi;

    HPMP_IO_OPEN_START(fl_xmpi, comm);
#endif /* MPI_hpux */

	error_code = MPI_SUCCESS;

    if (comm == MPI_COMM_NULL) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: Invalid communicator\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_COMM, MPIR_ERR_COMM_NULL,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

    MPI_Comm_test_inter(comm, &flag);
    if (flag) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: Intercommunicator cannot be passed to MPI_File_open\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_COMM, MPIR_ERR_COMM_INTER,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

    if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) +
	 ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: Exactly one of MPI_MODE_RDONLY, MPI_MODE_WRONLY, or MPI_MODE_RDWR must be specified\n");
	MPI_Abort(MPI_COMM_WORLD, 1); 
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 3,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

    if ((amode & MPI_MODE_RDONLY) && 
            ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: It is erroneous to specify MPI_MODE_CREATE or MPI_MODE_EXCL with MPI_MODE_RDONLY\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 5,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

    if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: It is erroneous to specify MPI_MODE_SEQUENTIAL with MPI_MODE_RDWR\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_AMODE, 7,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

/* check if amode is the same on all processes */
    MPI_Comm_dup(comm, &dupcomm);
    tmp_amode = amode;

/*  
    Removed this check because broadcast is too expensive. 
    MPI_Bcast(&tmp_amode, 1, MPI_INT, 0, dupcomm);
    if (amode != tmp_amode) {
	FPRINTF(stderr, "MPI_File_open: amode must be the same on all processes\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
*/

/* check if ADIO has been initialized. If not, initialize it */
    if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) {

/* check if MPI itself has been initialized. If not, flag an error.
   Can't initialize it here, because don't know argc, argv */
	MPI_Initialized(&flag);
	if (!flag) {
	    FPRINTF(stderr, "Error: MPI_Init() must be called before using MPI-IO\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}

	MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval,
			  (void *) 0);  

/* put a dummy attribute on MPI_COMM_WORLD, because we want the delete
   function to be called when MPI_COMM_WORLD is freed. Hopefully the
   MPI library frees MPI_COMM_WORLD when MPI_Finalize is called,
   though the standard does not mandate this. */

	MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0);

/* initialize ADIO */

	ADIO_Init( (int *)0, (char ***)0, &error_code);
    }


    file_system = -1;
    tmp = strchr(filename, ':');
#ifdef WIN32
    // Unfortunately Windows uses ':' behind the drive letter.
    // So we check if there is only one letter before the ':'
    // Please do not use a single letter filesystem name!
    if(tmp && ((tmp-filename) == 1)) tmp = 0;
#endif
    if (!tmp) {
	ADIO_FileSysType(filename, &file_system, &err);
	if (err != MPI_SUCCESS) {
#ifdef PRINT_ERR_MSG
	    FPRINTF(stderr, "MPI_File_open: Can't determine the file-system type. Check the filename/path you provided and try again. Otherwise, prefix the filename with a string to indicate the type of file sytem (piofs:, pfs:, nfs:, ufs:, hfs:, xfs:, sfs:, pvfs:, svm:).\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_FSTYPE,
				     myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
	}
	MPI_Allreduce(&file_system, &min_code, 1, MPI_INT, MPI_MIN, dupcomm);
	if (min_code == ADIO_NFS) file_system = ADIO_NFS;
    }


#ifndef PFS
    if (!strncmp(filename, "pfs:", 4) || !strncmp(filename, "PFS:", 4) || (file_system == ADIO_PFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PFS,
				     myname, (char *) 0, (char *) 0,"PFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef PIOFS
    if (!strncmp(filename, "piofs:", 6) || !strncmp(filename, "PIOFS:", 6) || (file_system == ADIO_PIOFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PIOFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PIOFS,
				     myname, (char *) 0, (char *) 0,"PIOFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef UFS
    if (!strncmp(filename, "ufs:", 4) || !strncmp(filename, "UFS:", 4) || (file_system == ADIO_UFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the UFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_UFS,
				     myname, (char *) 0, (char *) 0,"UFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef NFS
    if (!strncmp(filename, "nfs:", 4) || !strncmp(filename, "NFS:", 4) || (file_system == ADIO_NFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the NFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_NFS,
				     myname, (char *) 0, (char *) 0,"NFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef HFS
    if (!strncmp(filename, "hfs:", 4) || !strncmp(filename, "HFS:", 4) || (file_system == ADIO_HFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the HFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_HFS,
				     myname, (char *) 0, (char *) 0,"HFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef XFS
    if (!strncmp(filename, "xfs:", 4) || !strncmp(filename, "XFS:", 4) || (file_system == ADIO_XFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the XFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_XFS,
				     myname, (char *) 0, (char *) 0,"XFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef SFS
    if (!strncmp(filename, "sfs:", 4) || !strncmp(filename, "SFS:", 4) || (file_system == ADIO_SFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_SFS,
				     myname, (char *) 0, (char *) 0,"SFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif
#ifndef PVFS
    if (!strncmp(filename, "pvfs:", 5) || !strncmp(filename, "PVFS:", 5) || (file_system == ADIO_PVFS)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the PVFS file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_PVFS,
				     myname, (char *) 0, (char *) 0,"PVFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif

#ifndef SVM
    if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "SVM:", 4) || (file_system == ADIO_SVM)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SVM file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_SVM,
				     myname, (char *) 0, (char *) 0,"SVM");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif

#ifndef NTFS
    if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "NTFS:", 4) || (file_system == ADIO_SVM)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: ROMIO has not been configured to use the SVM file system\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_NTFS,
				     myname, (char *) 0, (char *) 0,"NTFS");
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }
#endif

    if (!strncmp(filename, "pfs:", 4) || !strncmp(filename, "PFS:", 4)) {
	file_system = ADIO_PFS;
	filename += 4;
    }
    else if (!strncmp(filename, "piofs:", 6) || !strncmp(filename, "PIOFS:", 6)) {
	file_system = ADIO_PIOFS;
	filename += 6;
    }
    else if (!strncmp(filename, "ufs:", 4) || !strncmp(filename, "UFS:", 4)) {
	file_system = ADIO_UFS;
	filename += 4;
    }
    else if (!strncmp(filename, "nfs:", 4) || !strncmp(filename, "NFS:", 4)) {
	file_system = ADIO_NFS;
	filename += 4;
    }
    else if (!strncmp(filename, "hfs:", 4) || !strncmp(filename, "HFS:", 4)) {
	file_system = ADIO_HFS;
	filename += 4;
    }
    else if (!strncmp(filename, "xfs:", 4) || !strncmp(filename, "XFS:", 4)) {
	file_system = ADIO_XFS;
	filename += 4;
    }
    else if (!strncmp(filename, "sfs:", 4) || !strncmp(filename, "SFS:", 4)) {
	file_system = ADIO_SFS;
	filename += 4;
    }
    else if (!strncmp(filename, "pvfs:", 5) || !strncmp(filename, "PVFS:", 5)) {
	file_system = ADIO_PVFS;
	filename += 5;
    }
    else if (!strncmp(filename, "svm:", 4) || !strncmp(filename, "SVM:", 4)) {
	file_system = ADIO_SVM;
	filename += 4;
    }
    else if (!strncmp(filename, "ntfs:", 4) || !strncmp(filename, "NTFS:", 4)) {
	file_system = ADIO_NTFS;
	filename += 5;
    }
    if (((file_system == ADIO_PIOFS) || (file_system == ADIO_PVFS)) && 
        (amode & MPI_MODE_SEQUENTIAL)) {
#ifdef PRINT_ERR_MSG
	FPRINTF(stderr, "MPI_File_open: MPI_MODE_SEQUENTIAL not supported on PIOFS and PVFS\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
#else
	error_code = MPIR_Err_setmsg(MPI_ERR_UNSUPPORTED_OPERATION, 
                    MPIR_ERR_NO_MODE_SEQ, myname, (char *) 0, (char *) 0);
	return ADIOI_Error(MPI_FILE_NULL, error_code, myname);
#endif
    }

    orig_amode = amode;
    MPI_Comm_rank(dupcomm, &rank);

    if ((amode & MPI_MODE_CREATE) && (amode & MPI_MODE_EXCL)) {
	/* the open should fail if the file exists. Only process 0 should
           check this. Otherwise, if all processes try to check and the file 
           does not exist, one process will create the file and others who 
           reach later will return error. */

	if (!rank) {
	    MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
	    /* this dup is freed either in ADIO_Open if the open fails,
               or in ADIO_Close */
	    *fh = ADIO_Open(dupcommself, filename, file_system, amode, 0, 
               MPI_BYTE, MPI_BYTE, M_ASYNC, info, ADIO_PERM_NULL, &error_code);
	    /* broadcast the error code to other processes */
	    MPI_Bcast(&error_code, 1, MPI_INT, 0, dupcomm);
	    /* if no error, close the file. It will be reopened normally 
               below. */
	    if (error_code == MPI_SUCCESS) ADIO_Close(*fh, &error_code);
	}
	else MPI_Bcast(&error_code, 1, MPI_INT, 0, dupcomm);

	if (error_code != MPI_SUCCESS) {
	    MPI_Comm_free(&dupcomm);
	    *fh = MPI_FILE_NULL;
#ifdef MPI_hpux
	    HPMP_IO_OPEN_END(fl_xmpi, *fh, comm);
#endif /* MPI_hpux */
	    return error_code;
	}
	else amode = amode ^ MPI_MODE_EXCL;  /* turn off MPI_MODE_EXCL */
    }

/* use default values for disp, etype, filetype */    
/* set iomode=M_ASYNC. It is used to implement the Intel PFS interface
   on top of ADIO. Not relevant for MPI-IO implementation */    

    *fh = ADIO_Open(dupcomm, filename, file_system, amode, 0, MPI_BYTE,
                     MPI_BYTE, M_ASYNC, info, ADIO_PERM_NULL, &error_code);

    /* if MPI_MODE_EXCL was removed, add it back */
    if ((error_code == MPI_SUCCESS) && (amode != orig_amode))
	(*fh)->access_mode = orig_amode;

    /* determine name of file that will hold the shared file pointer */
    /* can't support shared file pointers on a file system that doesn't
       support file locking, e.g., PIOFS, PVFS */
    if ((error_code == MPI_SUCCESS) && ((*fh)->file_system != ADIO_PIOFS)
          && ((*fh)->file_system != ADIO_PVFS)) {
	ADIOI_Shfp_fname(*fh, rank);

        /* if MPI_MODE_APPEND, set the shared file pointer to end of file.
           indiv. file pointer already set to end of file in ADIO_Open. 
           Here file view is just bytes. */
	if ((*fh)->access_mode & MPI_MODE_APPEND) {
	    if (!rank) ADIO_Set_shared_fp(*fh, (*fh)->fp_ind, &error_code);
	    MPI_Barrier(dupcomm);
	}
    }

#ifdef MPI_hpux
    HPMP_IO_OPEN_END(fl_xmpi, *fh, comm);
#endif /* MPI_hpux */
    return error_code;
}