Пример #1
FORTRAN_API void FORT_CALL mpi_group_compare_ ( MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *result, MPI_Fint *__ierr )
    int l_result;
    *__ierr = MPI_Group_compare( MPI_Group_f2c(*group1), 
                                 MPI_Group_f2c(*group2), &l_result );
    *result = l_result;
Пример #2
JNIEXPORT jint JNICALL Java_mpi_Group_compare(
        JNIEnv *env, jclass jthis, jlong group1, jlong group2)
    int result, rc;
    rc = MPI_Group_compare((MPI_Group)group1, (MPI_Group)group2, &result);
    ompi_java_exceptionCheck(env, rc);
    return result;
Пример #3
int main(int argc, char *argv[])
	int rank;
	MPI_Status status;

	MPI_Init(&argc, &argv);

	MPI_Group gw, g1, g2, g3;
	MPI_Comm_group(MPI_COMM_WORLD, &gw);

	int new_ranks[] = {0, 2, 3};
	MPI_Group_incl(gw, 3, new_ranks, &g1);

	int new_ranks2[] = {2, 3, 0};
	MPI_Group_incl(gw, 3, new_ranks2, &g2);

	MPI_Group_incl(gw, 3, new_ranks2, &g3);

	int size1, size2, size3;
	MPI_Group_size(g1, &size1);
	MPI_Group_size(g1, &size2);
	MPI_Group_size(g1, &size3);

	if (size1 != size2 || size2 != size3 || size1 != 3) {
		return 1;

	int r1, r2;
	MPI_Group_compare(g1, g2, &r1);
	MPI_Group_compare(g2, g3, &r2);
	if (r1 != MPI_SIMILAR || r2 != MPI_IDENT) {
		return 1;


	return 0;
Пример #4
 * Class:     mpi_Group
 * Method:    Compare
 * Signature: (Lmpi/Group;Lmpi/Group;)I
JNIEXPORT jint JNICALL Java_mpi_Group_Compare(JNIEnv *env, jclass jthis,
                                              jobject group1, jobject group2)
    int result;

    ompi_java_clearFreeList(env) ;

    return result;
Пример #5
static VALUE group_equal(VALUE self, VALUE rgrp2)
    int rv, flag;
    MPI_Group *grp1, *grp2;

    Data_Get_Struct(self, MPI_Group, grp1);
    Data_Get_Struct(grp2, MPI_Group, grp2);

    rv = MPI_Group_compare(*grp1, *grp2, &flag);

    return flag == MPI_IDENT ? Qtrue : Qfalse;
Пример #6
void mpi_group_compare_f(MPI_Fint *group1, MPI_Fint *group2,
                         MPI_Fint *result, MPI_Fint *ierr)
    ompi_group_t *c_group1, *c_group2;

    /* make the fortran to c representation conversion */
    c_group1 = MPI_Group_f2c(*group1);
    c_group2 = MPI_Group_f2c(*group2);

    *ierr = OMPI_INT_2_FINT(MPI_Group_compare(c_group1, c_group2, 
    if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) {
Пример #7
int c2ffile_ ( int *file )
    MPI_File cFile = MPI_File_f2c( *file );
    MPI_Group group, wgroup;
    int result;

    MPI_File_get_group( cFile, &group );
    MPI_Comm_group( MPI_COMM_WORLD, &wgroup );

    MPI_Group_compare( group, wgroup, &result );
    if (result != MPI_IDENT) {
	fprintf( stderr, "File: did not get expected group\n" );
	return 1;

    MPI_Group_free( &group );
    MPI_Group_free( &wgroup );
    return 0;
Пример #8
int main( int argc, char *argv[] )
    MPI_Group g1, g2, g4, g5, g45, selfgroup, g6;
    int ranks[16], size, rank, myrank, range[1][3];
    int errs = 0;
    int i, rin[16], rout[16], result;


	MPI_Comm_group( MPI_COMM_WORLD, &g1 );
	MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
	MPI_Comm_size( MPI_COMM_WORLD, &size );
	if (size < 8) {
	    fprintf( stderr, 
		  "Test requires 8 processes (16 prefered) only %d provided\n",
		     size );

	/* 16 members, this process is rank 0, return in group 1 */
	ranks[0] = myrank; ranks[1] = 2; ranks[2] = 7;
	if (myrank == 2) ranks[1] = 3;
	if (myrank == 7) ranks[2] = 6;
	MPI_Group_incl( g1, 3, ranks, &g2 );
	/* Check the resulting group */
	MPI_Group_size( g2, &size );
	MPI_Group_rank( g2, &rank );
	if (size != 3) {
	    fprintf( stderr, "Size should be %d, is %d\n", 3, size );
	if (rank != 0) {
	    fprintf( stderr, "Rank should be %d, is %d\n", 0, rank );

	rin[0] = 0; rin[1] = 1; rin[2] = 2;
	MPI_Group_translate_ranks( g2, 3, rin, g1, rout );
	for (i=0; i<3; i++) {
	    if (rout[i] != ranks[i]) {
		fprintf( stderr, "translated rank[%d] %d should be %d\n", 
			 i, rout[i], ranks[i] );
	/* Translate the process of the self group against another group */
	MPI_Comm_group( MPI_COMM_SELF, &selfgroup );
	rin[0] = 0;
	MPI_Group_translate_ranks( selfgroup, 1, rin, g1, rout );
	if (rout[0] != myrank) {
	    fprintf( stderr, "translated of self is %d should be %d\n", 
			 rout[0], myrank );

	for (i=0; i<size; i++) 
	    rin[i] = i;
	MPI_Group_translate_ranks( g1, size, rin, selfgroup, rout );
	for (i=0; i<size; i++) {
	    if (i == myrank && rout[i] != 0) {
		fprintf( stderr, "translated world to self of %d is %d\n",
			 i, rout[i] );
	    else if (i != myrank && rout[i] != MPI_UNDEFINED) {
		fprintf( stderr, "translated world to self of %d should be undefined, is %d\n",
			 i, rout[i] );
	MPI_Group_free( &selfgroup );

	/* Exclude everyone in our group */
	    int ii, *lranks, g1size;

	    MPI_Group_size( g1, &g1size );
	    lranks = (int *)malloc( g1size * sizeof(int) );
	    for (ii=0; ii<g1size; ii++) lranks[ii] = ii;
	    MPI_Group_excl( g1, g1size, lranks, &g6 );
	    if (g6 != MPI_GROUP_EMPTY) {
		fprintf( stderr, "Group formed by excluding all ranks not empty\n" );
		MPI_Group_free( &g6 );
	    free( lranks );
	/* Add tests for additional group operations */
	   g2 = incl 1,3,7
	   g3 = excl 1,3,7
	   intersect ( w, g2 ) => g2
	   intersect ( w, g3 ) => g3
	   intersect ( g2, g3 ) => empty
	   g4 = rincl 1:n-1:2
	   g5 = rexcl 1:n-1:2
	   union( g4, g5 ) => world
	   g6 = rincl n-1:1:-1 
	   g7 = rexcl n-1:1:-1
	   union( g6, g7 ) => concat of entries, similar to world
	   diff( w, g2 ) => g3
	MPI_Group_free( &g2 );

	range[0][0] = 1;
	range[0][1] = size-1;
	range[0][2] = 2;
	MPI_Group_range_excl( g1, 1, range, &g5 );

	range[0][0] = 1;
	range[0][1] = size-1;
	range[0][2] = 2;
	MPI_Group_range_incl( g1, 1, range, &g4 );
	MPI_Group_union( g4, g5, &g45 );
	MPI_Group_compare( MPI_GROUP_EMPTY, g4, &result );
	if (result != MPI_UNEQUAL) {
	    fprintf( stderr, "Comparison with empty group gave %d, not 3\n",
		     result );
	MPI_Group_free( &g4 );
	MPI_Group_free( &g5 );
	MPI_Group_free( &g45 );

	/* Now, duplicate the test, but using negative strides */
	range[0][0] = size-1;
	range[0][1] = 1;
	range[0][2] = -2;
	MPI_Group_range_excl( g1, 1, range, &g5 );

	range[0][0] = size-1;
	range[0][1] = 1;
	range[0][2] = -2;
	MPI_Group_range_incl( g1, 1, range, &g4 );

	MPI_Group_union( g4, g5, &g45 );

	MPI_Group_compare( MPI_GROUP_EMPTY, g4, &result );
	if (result != MPI_UNEQUAL) {
	    fprintf( stderr, "Comparison with empty group (formed with negative strides) gave %d, not 3\n",
		     result );
	MPI_Group_free( &g4 );
	MPI_Group_free( &g5 );
	MPI_Group_free( &g45 );
        MPI_Group_free( &g1 );

    if (myrank == 0) 
	if (errs == 0) {
	    printf( " No Errors\n" );
	else {
	    printf( "Found %d errors\n", errs );

    return 0;
Пример #9
int main( int argc, char **argv )
    int errs=0, toterr;
    MPI_Group basegroup;
    MPI_Group g1, g2, g3, g4, g5, g6, g7, g8, g9, g10;
    MPI_Group g3a, g3b;
    MPI_Comm  comm, newcomm, splitcomm, dupcomm;
    int       i, grp_rank, rank, grp_size, size, result;
    int       nranks, *ranks, *ranks_out;
    int       range[1][3];
    int       worldrank;

    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &worldrank );

    comm = MPI_COMM_WORLD;

    MPI_Comm_group( comm, &basegroup );
    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );

/* Get the basic information on this group */
    MPI_Group_rank( basegroup, &grp_rank );
    if (grp_rank != rank) {
	fprintf( stdout, "group rank %d != comm rank %d\n", grp_rank, rank );

    MPI_Group_size( basegroup, &grp_size );
    if (grp_size != size) {
	fprintf( stdout, "group size %d != comm size %d\n", grp_size, size );

/* Form a new communicator with inverted ranking */
    MPI_Comm_split( comm, 0, size - rank, &newcomm );
    MPI_Comm_group( newcomm, &g1 );
    ranks	  = (int *)malloc( size * sizeof(int) );
    ranks_out = (int *)malloc( size * sizeof(int) );
    for (i=0; i<size; i++) ranks[i] = i;
    nranks = size;
    MPI_Group_translate_ranks( g1, nranks, ranks, basegroup, ranks_out );
    for (i=0; i<size; i++) {
	if (ranks_out[i] != (size - 1) - i) {
	    fprintf( stdout, "Translate ranks got %d expected %d\n", 
		     ranks_out[i], (size - 1) - i );

/* Check Compare */
    MPI_Group_compare( basegroup, g1, &result );
    if (result != MPI_SIMILAR) {
	fprintf( stdout, "Group compare should have been similar, was %d\n",
		 result );
    MPI_Comm_dup( comm, &dupcomm );
    MPI_Comm_group( dupcomm, &g2 );
    MPI_Group_compare( basegroup, g2, &result );
    if (result != MPI_IDENT) {
	fprintf( stdout, "Group compare should have been ident, was %d\n",
		 result );
    MPI_Comm_split( comm, rank < size/2, rank, &splitcomm );
    MPI_Comm_group( splitcomm, &g3 );
    MPI_Group_compare( basegroup, g3, &result );
    if (result != MPI_UNEQUAL) {
	fprintf( stdout, "Group compare should have been unequal, was %d\n",
		 result );

    /* Build two groups that have this process and one other, but do not
       have the same processes */
    ranks[0] = rank;
    ranks[1] = (rank + 1) % size;
    MPI_Group_incl( basegroup, 2, ranks, &g3a );
    ranks[1] = (rank + size - 1) % size;
    MPI_Group_incl( basegroup, 2, ranks, &g3b );
    MPI_Group_compare( g3a, g3b, &result );
    if (result != MPI_UNEQUAL) {
	fprintf( stdout, "Group compare of equal sized but different groups should have been unequal, was %d\n", result );

/* Build two new groups by excluding members; use Union to put them
   together again */

/* Exclude 0 */
    for (i=0; i<size; i++) ranks[i] = i;
    MPI_Group_excl( basegroup, 1, ranks, &g4 );
/* Exclude 1-(size-1) */
    MPI_Group_excl( basegroup, size-1, ranks+1, &g5 );
    MPI_Group_union( g5, g4, &g6 );
    MPI_Group_compare( basegroup, g6, &result );
    if (result != MPI_IDENT) {
	int usize;
	/* See ordering requirements on union */
	fprintf( stdout, "Group excl and union did not give ident groups\n" );
	fprintf( stdout, "[%d] result of compare was %d\n", rank, result );
	MPI_Group_size( g6, &usize );
	fprintf( stdout, "Size of union is %d, should be %d\n", usize, size );
    MPI_Group_union( basegroup, g4, &g7 );
    MPI_Group_compare( basegroup, g7, &result );
    if (result != MPI_IDENT) {
	int usize;
	fprintf( stdout, "Group union of overlapping groups failed\n" );
	fprintf( stdout, "[%d] result of compare was %d\n", rank, result );
	MPI_Group_size( g7, &usize );
	fprintf( stdout, "Size of union is %d, should be %d\n", usize, size );

/* Use range_excl instead of ranks */
    /* printf ("range excl\n" ); fflush( stdout ); */
    range[0][0] = 1;
    range[0][1] = size-1;
    range[0][2] = 1;
    MPI_Group_range_excl( basegroup, 1, range, &g8 );
    /* printf( "out  of range excl\n" ); fflush( stdout ); */
    MPI_Group_compare( g5, g8, &result );
    /* printf( "out of compare\n" ); fflush( stdout ); */
    if (result != MPI_IDENT) {
	fprintf( stdout, "Group range excl did not give ident groups\n" );

    /* printf( "intersection\n" ); fflush( stdout ); */
    MPI_Group_intersection( basegroup, g4, &g9 );
    MPI_Group_compare( g9, g4, &result );
    if (result != MPI_IDENT) {
	fprintf( stdout, "Group intersection did not give ident groups\n" );

/* Exclude EVERYTHING and check against MPI_GROUP_EMPTY */
    /* printf( "range excl all\n" ); fflush( stdout ); */
    range[0][0] = 0;
    range[0][1] = size-1;
    range[0][2] = 1;
    MPI_Group_range_excl( basegroup, 1, range, &g10 );

    /* printf( "done range excl all\n" ); fflush(stdout); */
    MPI_Group_compare( g10, MPI_GROUP_EMPTY, &result );
    /* printf( "done compare to MPI_GROUP_EMPTY\n" ); fflush(stdout); */

    if (result != MPI_IDENT) {
	fprintf( stdout, 
		 "MPI_GROUP_EMPTY didn't compare against empty group\n");

    /* printf( "freeing groups\n" ); fflush( stdout ); */
    MPI_Group_free( &basegroup );
    MPI_Group_free( &g1 );
    MPI_Group_free( &g2 );
    MPI_Group_free( &g3 );
    MPI_Group_free( &g3a );
    MPI_Group_free( &g3b );
    MPI_Group_free( &g4 );
    MPI_Group_free( &g5 );
    MPI_Group_free( &g6 );
    MPI_Group_free( &g7 );
    MPI_Group_free( &g8 );
    MPI_Group_free( &g9 );
    MPI_Group_free( &g10 );
    MPI_Comm_free( &dupcomm );
    MPI_Comm_free( &splitcomm );
    MPI_Comm_free( &newcomm );

    MPI_Allreduce( &errs, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (worldrank == 0) {
	if (toterr == 0) 
	    printf( " No Errors\n" );
	    printf( "Found %d errors in MPI Group routines\n", toterr );

    return toterr;
Пример #10
int main(int argc, char **argv) {

	int rankLeft[4] = {0, 1, 2, 3}, rankRight[4] = {4, 5, 6, 7};
	int i, result;
	char outStr[600];

	int nProcs, myRank;
	MPI_Group grpWorld, grpNew;
	MPI_Comm commNew;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myRank);

	MPI_Comm_group(MPI_COMM_WORLD, &grpWorld);
	if (myRank < nProcs	/ 2) {
		MPI_Group_incl(grpWorld, nProcs / 2, rankLeft, &grpNew);
	} else {
		MPI_Group_incl(grpWorld, nProcs / 2, rankRight, &grpNew);
	MPI_Comm_create(MPI_COMM_WORLD, grpNew, &commNew);

	int myRankCommNew, nProcsCommNew;
	int myRankGrpNew, nProcsGrpNew;

	MPI_Comm_rank(commNew, &myRankCommNew);
	MPI_Comm_size(commNew, &nProcsCommNew);
	MPI_Group_rank(grpNew, &myRankGrpNew);
	MPI_Group_size(grpNew, &nProcsGrpNew);

	fprintf(stdout, "WorldRank: %d in %d, NewCommRank: %d in %d, NewGrpRank: %d in %d\n",
		myRank, nProcs, myRankCommNew, nProcsCommNew, myRankGrpNew, nProcsGrpNew);


	int sendBuf = myRank, recvBuf;

	MPI_Allreduce(&sendBuf, &recvBuf, 1, MPI_INT, MPI_SUM, commNew);

	fprintf(stdout, "WorldRank = %d, sendBuf = %d, recvBuf = %d\n", myRank, sendBuf, recvBuf);


	int ranks1[8] = {0, 1, 2, 3, 4, 5, 6, 7}, ranks2[8];

	MPI_Group_compare(grpWorld, grpNew, &result);
	MPI_Group_translate_ranks(grpWorld, nProcs, ranks1, grpNew, ranks2);
	if (myRank == 0) {
		fprintf(stdout, "result = %d\n", result);
	sprintf_s(outStr, "rank %d: ", myRank);
	for (i = 0; i < nProcs; i++) {
		sprintf_s(outStr, "%s%d = %d ", outStr, ranks1[i], ranks2[i]);
	fprintf(stdout, "%s\n", outStr);


	return 0;
void repairComm(MPI_Comm * broken, MPI_Comm * repaired, int iteration, int * listFails, int * numFails,
                int * numNodeFails, int sumPrevNumNodeFails, int argc, char ** argv, int verbosity) {
    MPI_Comm tempShrink, unorderIntracomm, tempIntercomm;
    int i, ret, result, procsNeeded = 0, oldRank, newRank, oldGroupSize, rankKey = 0, flag;
    int * tempRanks, * failedRanks, * errCodes, rank, hostfileLineIndex;
    MPI_Group oldGroup, failedGroup, shrinkGroup;
    int hostfileLastLineIndex, tempLineIndex, * failedNodeList = NULL, * nodeList = NULL, totNodeFailed = 0;
    double startTime = 0.0, endTime;
    int nprocs, j, * shrinkMergeList;
    char hostName[128];
    gethostname(hostName, sizeof(hostName));

    char ** appToLaunch;
    char *** argvToLaunch;
    int * procsNeededToLaunch;
    MPI_Info * hostInfoToLaunch;
    char ** hostNameToLaunch;

    MPI_Comm_rank(*broken, &rank);
    if(rank == 0)
        startTime = MPI_Wtime();

    MPI_Comm_size(*broken, &oldGroupSize);
    MPI_Comm_group(*broken, &oldGroup);
    MPI_Comm_rank(*broken, &oldRank);
    OMPI_Comm_failure_get_acked(*broken, &failedGroup);
    MPI_Group_size(failedGroup, &procsNeeded);
    errCodes = (int *) malloc(sizeof(int) * procsNeeded);

    // Figure out ranks of the processes which had failed
    tempRanks = (int *) malloc(sizeof(int) * oldGroupSize);
    failedRanks = (int *) malloc(sizeof(int) * oldGroupSize);
    #pragma omp parallel for default(shared)
    for(i = 0; i < oldGroupSize; i++)
        tempRanks[i] = i;

    MPI_Group_translate_ranks(failedGroup, procsNeeded, tempRanks, oldGroup, failedRanks);

    double shrinkTime = MPI_Wtime();
    // Shrink the broken communicator to remove failed procs
    if(MPI_SUCCESS != (ret = OMPI_Comm_shrink(*broken, &tempShrink)))
        printf("Iteration %d: OMPI_Comm_shrink (parent): ERROR!\n", iteration);
    else {
        if(verbosity > 1 )
            printf("Iteration %d: OMPI_Comm_shrink (parent): SUCCESS\n", iteration);
    if (verbosity > 0 && rank == 0)
        printf("OMPI_Comm_shrink takes %0.6f Sec\n", MPI_Wtime() - shrinkTime);

    MPI_Comm_group(*broken, &oldGroup);
    MPI_Comm_group(tempShrink, &shrinkGroup);
    MPI_Comm_size(*broken, &oldGroupSize);

    MPI_Group_compare(oldGroup, shrinkGroup, &result);

    if(result != MPI_IDENT)
        MPI_Group_difference(oldGroup, shrinkGroup, &failedGroup);

    MPI_Comm_rank(*broken, &oldRank);
    MPI_Group_size(failedGroup, &procsNeeded);

    errCodes = (int *) malloc(sizeof(int)*procsNeeded);

    // Figure out ranks of the processes which had failed
    tempRanks = (int*)malloc(sizeof(int)*oldGroupSize);
    failedRanks = (int*)malloc(sizeof(int)*oldGroupSize);
    #pragma omp parallel for default(shared)
    for(i = 0; i < oldGroupSize; i++)
        tempRanks[i] = i;

    MPI_Group_translate_ranks(failedGroup, procsNeeded, tempRanks, oldGroup, failedRanks);


    // Assign number of failed processes
    *numFails = procsNeeded;

    hostNameToLaunch = (char **) malloc(procsNeeded * sizeof(char *));

    if(verbosity > 0 && rank == 0)
        printf("*** Iteration %d: Application: Number of process(es) failed in the corresponding "
               "communicator is %d ***\n", iteration, procsNeeded);

    if(rank == 0) {
        endTime = MPI_Wtime();
        printf("[%d]----- Creating failed process list takes %0.6f Sec (MPI_Wtime) -----\n", rank, endTime - startTime);

    // Determining total number of node failed, and a list of them
    hostfileLastLineIndex = getHostfileLastLineIndex(); //started from 0
    nodeList = (int *) malloc((hostfileLastLineIndex+1) * sizeof(int));
    memset(nodeList, 0, (hostfileLastLineIndex+1)*sizeof(int)); // initialize nodeList with 0's

    for(int i = 0; i < procsNeeded; ++i) {
        tempLineIndex = failedRanks[i]/SLOTS; //started from 0
        nodeList[tempLineIndex] = 1;

    for(int nodeCounter = 0; nodeCounter < (hostfileLastLineIndex+1); ++nodeCounter)
        totNodeFailed += nodeList[nodeCounter];
    *numNodeFails = totNodeFailed;

    // Check if there is sufficient spare node available for recovery
    if((hostfileLastLineIndex - totNodeFailed -sumPrevNumNodeFails) < (oldGroupSize-1)/SLOTS) {
        if(rank == 0)
            printf("[%d] There is no sufficient spare node available for recovery.\n", rank);

    failedNodeList = (int *) malloc(totNodeFailed * sizeof(int));
    memset(failedNodeList, 0, totNodeFailed * sizeof(int)); // initialize failedNodeList with 0's

    int failedNodeCounter = 0;
    for(int nodeCounter = 0; nodeCounter < (hostfileLastLineIndex+1); ++nodeCounter) {
        if(nodeList[nodeCounter] == 1)
            failedNodeList[failedNodeCounter++] = nodeCounter;

    char * hostNameFailed = NULL;
    #pragma omp parallel for default(shared)
    for(i = 0; i < procsNeeded; ++i) {
        // Assign list of processes failed
        listFails[i] = failedRanks[i];

        tempLineIndex = failedRanks[i]/SLOTS; //started from 0
        for(int j = 0; j < totNodeFailed; ++j) {
            if(failedNodeList[j] == tempLineIndex)
                hostfileLineIndex = hostfileLastLineIndex - j - sumPrevNumNodeFails;
#else // Recovery on the same node (no node failure, only process failure)
        hostfileLineIndex = tempLineIndex;
        hostNameToLaunch[i] = getHostToLaunch(hostfileLineIndex);
        hostNameFailed = getHostToLaunch(tempLineIndex);
#else // Run on head node or personal machine
        hostNameToLaunch[i] = (char *)hostName;
        hostNameFailed = (char *)hostName;

        if(verbosity > 0 && rank == 0)
            printf("--- Iteration %d: Application: Process %d on node %s is failed! ---\n", iteration, failedRanks[i], hostNameFailed);
    // Release memory of hostNameFailed

    appToLaunch = (char **) malloc(procsNeeded * sizeof(char *));
    argvToLaunch = (char ***) malloc(procsNeeded * sizeof(char **));
    procsNeededToLaunch = (int *) malloc(procsNeeded * sizeof(int));
    hostInfoToLaunch = (MPI_Info *) malloc(procsNeeded * sizeof(MPI_Info));
    argv[argc] = NULL;
    #pragma omp parallel for default(shared)
    for(i = 0; i < procsNeeded; i++) {
        appToLaunch[i] = (char *)argv[0];
        argvToLaunch[i] = (char **)argv;
        procsNeededToLaunch[i] = 1;
        // Host information where to spawn the processes
        MPI_Info_set(hostInfoToLaunch[i], (char *)"host", hostNameToLaunch[i]);
        //MPI_Info_set(hostInfoToLaunch[i], "hostfile", "hostfile");

    double spawnTime = MPI_Wtime();
    OMPI_Comm_agree(tempShrink, &flag);
    // Spawn the new process(es)
    if(MPI_SUCCESS != (ret = MPI_Comm_spawn_multiple(procsNeeded, appToLaunch, argvToLaunch, procsNeededToLaunch,
                             hostInfoToLaunch, 0, tempShrink, &tempIntercomm, MPI_ERRCODES_IGNORE))) {
        if(MPI_ERR_COMM  == ret)
            printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid communicator (parent)\n", iteration);
        if(MPI_ERR_ARG  == ret)
            printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid argument (parent)\n", iteration);
        if(MPI_ERR_INFO  == ret)
            printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid info (parent)\n", iteration);

        if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) {
            return repairComm(broken, repaired, iteration, listFails, numFails, numNodeFails,
                              sumPrevNumNodeFails, argc, argv, verbosity);
        else {
            fprintf(stderr, "Iteration %d: Unknown error with MPI_Comm_spawn_multiple (parent): %d\n", iteration, ret);
    else {
        if(verbosity > 0 && rank == 0) {
            for(i = 0; i < procsNeeded; i++)
                printf("Iteration %d: MPI_Comm_spawn_multiple (parent) [spawning failed process %d on "
                       "node %s]: SUCCESS\n", iteration, failedRanks[i], hostNameToLaunch[i]);
        // Memory release. Moving the last two to the end of the function causes segmentation faults for 4 processes failure
    if (verbosity > 0 && rank == 0)
        printf("MPI_Comm_spawn_multiple takes %0.6f Sec\n", MPI_Wtime() - spawnTime);

    double mergeTime = MPI_Wtime();
    // Merge the new processes into a new communicator
    if(MPI_SUCCESS != (ret = MPI_Intercomm_merge(tempIntercomm, false, &unorderIntracomm))) {
        if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) {
            // Start the recovery over again if there is a failure
            return repairComm(broken, repaired, iteration, listFails, numFails,
                              numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity);
        else if(MPI_ERR_COMM == ret) {
            fprintf(stderr, "Iteration %d: Invalid communicator in MPI_Intercomm_merge (parent) %d\n", iteration, ret);
        else if(MPI_ERR_INTERN == ret) {
            fprintf(stderr, "Iteration %d: Acquaring memory error in MPI_Intercomm_merge ()%d\n", iteration, ret);
        else {
            fprintf(stderr, "Iteration %d: Unknown error with MPI_Intercomm_merge: %d\n", iteration, ret);
    else {
        if(verbosity > 1 )
            printf("Iteration %d: MPI_Intercomm_merge (parent): SUCCESS\n", iteration);
    if (verbosity > 0 && rank == 0)
        printf("MPI_Intercomm_merge takes %0.6f Sec\n", MPI_Wtime() - mergeTime);

    double agreeTime = MPI_Wtime();
    // Synchronize. sometimes hangs in without this
    // position of code and intercommunicator (not intra) is important
    OMPI_Comm_agree(tempIntercomm, &flag);// since some of the times MPI_Barrier hangs
    if (verbosity > 0 && rank == 0)
        printf("OMPI_Comm_agree takes %0.6f Sec\n", MPI_Wtime() - agreeTime);

    // Sending failed ranks and number of processes failed to the the newly created ranks.
    // oldGroupSize is the size of communicator before failure.
    // procsNeeded is the number of processes that are failed
    int * child = (int *) malloc(procsNeeded*sizeof(int));
    #pragma omp parallel for default(shared)
    for(i = 0; i < procsNeeded; i++)
        child[i] = oldGroupSize - procsNeeded + i;

    MPI_Comm_rank(unorderIntracomm, &newRank);
    if(newRank == 0) {
        int send_val[2];
        for(i = 0; i < procsNeeded; i++) {
            send_val[0] = failedRanks[i];
            send_val[1] = procsNeeded;
            if(MPI_SUCCESS != (ret = MPI_Send(&send_val, 2, MPI_INT, child[i], MERGE_TAG, unorderIntracomm))) {
                if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) {
                    // Start the recovery over again if there is a failure
                    return repairComm(broken, repaired, iteration, listFails, numFails,
                                      numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity);
                else {
                    fprintf(stderr, "Iteration %d: Unknown error with MPI_Send1 (parent): %d\n", iteration, ret);
            else {
                if(verbosity > 1 )
                    printf("Iteration %d: MPI_Send1 (parent): SUCCESS\n", iteration);

    // Split the current world (splitted from original) to order the ranks.
    MPI_Comm_rank(unorderIntracomm, &newRank);
    MPI_Comm_size(unorderIntracomm, &nprocs);

    // For one or more process failure (ordering)
    shrinkMergeList = (int *) malloc(nprocs*sizeof(int));

    j = 0;
    for(i = 0; i < nprocs; i++) {
        if(rankIsNotOnFailedList(i, failedRanks, procsNeeded))
            shrinkMergeList[j++] = i;

    for(i = j; i < nprocs; i++)
        shrinkMergeList[i] = failedRanks[i-j];

    for(i = 0; i < (nprocs - procsNeeded); i++) {
        if(newRank == i)
            rankKey = shrinkMergeList[i];

    if(MPI_SUCCESS != (MPI_Comm_split(unorderIntracomm, 0, rankKey, repaired))) {
        if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) {
            // Start the recovery over again if there is a failure
            return repairComm(broken, repaired, iteration, listFails, numFails,
                              numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity);
        else {
            fprintf(stderr, "Iteration %d: Unknown error with MPI_Comm_split (parent): %d\n", iteration, ret);
    else {
        if(verbosity > 1 )
            printf("Iteration %d: MPI_Comm_split (parent): SUCCESS\n", iteration);

    // Release memory
    if(failedNodeList != NULL)
    if(nodeList != NULL)
Пример #12
 * This test makes sure that after a failure, the correct group of failed
 * processes is returned from MPIX_Comm_failure_ack/get_acked.
int main(int argc, char **argv)
    int rank, size, err, result, i;
    char buf[10] = " No errors";
    char error[MPI_MAX_ERROR_STRING];
    MPI_Group failed_grp, one_grp, world_grp;
    int one[] = { 1 };
    int world_ranks[] = { 0, 1, 2 };
    int failed_ranks[3];

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size < 3) {
        fprintf(stderr, "Must run with at least 3 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);

    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    if (rank == 1) {

    if (rank == 0) {
        err = MPI_Recv(buf, 10, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS == err) {
            fprintf(stderr, "Expected a failure for receive from rank 1\n");
            MPI_Abort(MPI_COMM_WORLD, 1);

        err = MPIX_Comm_failure_ack(MPI_COMM_WORLD);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_ack returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);
        err = MPIX_Comm_failure_get_acked(MPI_COMM_WORLD, &failed_grp);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_get_acked returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);

        MPI_Comm_group(MPI_COMM_WORLD, &world_grp);
        MPI_Group_incl(world_grp, 1, one, &one_grp);
        MPI_Group_compare(one_grp, failed_grp, &result);
        if (MPI_IDENT != result) {
            fprintf(stderr, "First failed group contains incorrect processes\n");
            MPI_Group_size(failed_grp, &size);
            MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
            for (i = 0; i < size; i++)
                fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
            MPI_Abort(MPI_COMM_WORLD, 1);

        err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != err) {
            fprintf(stderr, "First receive failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS == err) {
            fprintf(stderr, "Expected a failure for receive from rank 2\n");
            MPI_Abort(MPI_COMM_WORLD, 1);

        err = MPIX_Comm_failure_get_acked(MPI_COMM_WORLD, &failed_grp);
        if (MPI_SUCCESS != err) {
            int ec;
            MPI_Error_class(err, &ec);
            MPI_Error_string(err, error, &size);
            fprintf(stderr, "MPIX_Comm_failure_get_acked returned an error: %d\n%s", ec, error);
            MPI_Abort(MPI_COMM_WORLD, 1);

        MPI_Group_compare(one_grp, failed_grp, &result);
        if (MPI_IDENT != result) {
            fprintf(stderr, "Second failed group contains incorrect processes\n");
            MPI_Group_size(failed_grp, &size);
            MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
            for (i = 0; i < size; i++)
                fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
            MPI_Abort(MPI_COMM_WORLD, 1);

        fprintf(stdout, " No errors\n");
    else if (rank == 2) {
        MPI_Ssend(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD);


Пример #13
Файл: MPI-api.c Проект: 8l/rose
void declareBindings (void)
  /* === Point-to-point === */
  void* buf;
  int count;
  MPI_Datatype datatype;
  int dest;
  int tag;
  MPI_Comm comm;
  MPI_Send (buf, count, datatype, dest, tag, comm); // L12
  int source;
  MPI_Status status;
  MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15
  MPI_Get_count (&status, datatype, &count);
  MPI_Bsend (buf, count, datatype, dest, tag, comm);
  MPI_Ssend (buf, count, datatype, dest, tag, comm);
  MPI_Rsend (buf, count, datatype, dest, tag, comm);
  void* buffer;
  int size;
  MPI_Buffer_attach (buffer, size); // L22
  MPI_Buffer_detach (buffer, &size);
  MPI_Request request;
  MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25
  MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Issend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irecv (buf, count, datatype, source, tag, comm, &request);
  MPI_Wait (&request, &status);
  int flag;
  MPI_Test (&request, &flag, &status); // L32
  MPI_Request_free (&request);
  MPI_Request* array_of_requests;
  int index;
  MPI_Waitany (count, array_of_requests, &index, &status); // L36
  MPI_Testany (count, array_of_requests, &index, &flag, &status);
  MPI_Status* array_of_statuses;
  MPI_Waitall (count, array_of_requests, array_of_statuses); // L39
  MPI_Testall (count, array_of_requests, &flag, array_of_statuses);
  int incount;
  int outcount;
  int* array_of_indices;
  MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L44--45
  MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L46--47
  MPI_Iprobe (source, tag, comm, &flag, &status); // L48
  MPI_Probe (source, tag, comm, &status);
  MPI_Cancel (&request);
  MPI_Test_cancelled (&status, &flag);
  MPI_Send_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Recv_init (buf, count, datatype, source, tag, comm, &request);
  MPI_Start (&request);
  MPI_Startall (count, array_of_requests);
  void* sendbuf;
  int sendcount;
  MPI_Datatype sendtype;
  int sendtag;
  void* recvbuf;
  int recvcount;
  MPI_Datatype recvtype;
  MPI_Datatype recvtag;
  MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag,
		recvbuf, recvcount, recvtype, source, recvtag,
		comm, &status); // L67--69
  MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag,
			comm, &status); // L70--71
  MPI_Datatype oldtype;
  MPI_Datatype newtype;
  MPI_Type_contiguous (count, oldtype, &newtype); // L74
  int blocklength;
    int stride;
    MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78
    MPI_Aint stride;
    MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82
  int* array_of_blocklengths;
    int* array_of_displacements;
    MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements,
		      oldtype, &newtype); // L87--88
    MPI_Aint* array_of_displacements;
    MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements,
                       oldtype, &newtype); // L92--93
    MPI_Datatype* array_of_types;
    MPI_Type_struct (count, array_of_blocklengths, array_of_displacements,
                     array_of_types, &newtype); // L95--96
  void* location;
  MPI_Aint address;
  MPI_Address (location, &address); // L100
  MPI_Aint extent;
  MPI_Type_extent (datatype, &extent); // L102
  MPI_Type_size (datatype, &size);
  MPI_Aint displacement;
  MPI_Type_lb (datatype, &displacement); // L105
  MPI_Type_ub (datatype, &displacement);
  MPI_Type_commit (&datatype);
  MPI_Type_free (&datatype);
  MPI_Get_elements (&status, datatype, &count);
  void* inbuf;
  void* outbuf;
  int outsize;
  int position;
  MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114
  int insize;
  MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype,
	      comm); // L116--117
  MPI_Pack_size (incount, datatype, comm, &size);

  /* === Collectives === */
  MPI_Barrier (comm); // L121
  int root;
  MPI_Bcast (buffer, count, datatype, root, comm); // L123
  MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
	      root, comm); // L124--125
  int* recvcounts;
  int* displs;
  MPI_Gatherv (sendbuf, sendcount, sendtype,
               recvbuf, recvcounts, displs, recvtype,
	       root, comm); // L128--130
  MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
               root, comm); // L131--132
  int* sendcounts;
  MPI_Scatterv (sendbuf, sendcounts, displs, sendtype,
		recvbuf, recvcount, recvtype, root, comm); // L134--135
  MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
                 comm); // L136--137
  MPI_Allgatherv (sendbuf, sendcount, sendtype,
		  recvbuf, recvcounts, displs, recvtype,
		  comm); // L138--140
  MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
		comm); // L141--142
  int* sdispls;
  int* rdispls;
  MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype,
                 recvbuf, recvcounts, rdispls, recvtype,
		 comm); // L145--147
  MPI_Op op;
  MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149
#if 0
  MPI_User_function function;
  int commute;
  MPI_Op_create (function, commute, &op); // L153
  MPI_Op_free (&op); // L155
  MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm);
  MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm);
  MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm);

  /* === Groups, contexts, and communicators === */
  MPI_Group group;
  MPI_Group_size (group, &size); // L162
  int rank;
  MPI_Group_rank (group, &rank); // L164
  MPI_Group group1;
  int n;
  int* ranks1;
  MPI_Group group2;
  int* ranks2;
  MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170
  int result;
  MPI_Group_compare (group1, group2, &result); // L172
  MPI_Group newgroup;
  MPI_Group_union (group1, group2, &newgroup); // L174
  MPI_Group_intersection (group1, group2, &newgroup);
  MPI_Group_difference (group1, group2, &newgroup);
  int* ranks;
  MPI_Group_incl (group, n, ranks, &newgroup); // L178
  MPI_Group_excl (group, n, ranks, &newgroup);
  extern int ranges[][3];
  MPI_Group_range_incl (group, n, ranges, &newgroup); // L181
  MPI_Group_range_excl (group, n, ranges, &newgroup);
  MPI_Group_free (&group);
  MPI_Comm_size (comm, &size);
  MPI_Comm_rank (comm, &rank);
  MPI_Comm comm1;
  MPI_Comm comm2;
  MPI_Comm_compare (comm1, comm2, &result);
  MPI_Comm newcomm;
  MPI_Comm_dup (comm, &newcomm);
  MPI_Comm_create (comm, group, &newcomm);
  int color;
  int key;
  MPI_Comm_split (comm, color, key, &newcomm); // L194
  MPI_Comm_free (&comm);
  MPI_Comm_test_inter (comm, &flag);
  MPI_Comm_remote_size (comm, &size);
  MPI_Comm_remote_group (comm, &group);
  MPI_Comm local_comm;
  int local_leader;
  MPI_Comm peer_comm;
  int remote_leader;
  MPI_Comm newintercomm;
  MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag,
			&newintercomm); // L204--205
  MPI_Comm intercomm;
  MPI_Comm newintracomm;
  int high;
  MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209
  int keyval;
#if 0
  MPI_Copy_function copy_fn;
  MPI_Delete_function delete_fn;
  void* extra_state;
  MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215
  MPI_Keyval_free (&keyval); // L217
  void* attribute_val;
  MPI_Attr_put (comm, keyval, attribute_val); // L219
  MPI_Attr_get (comm, keyval, attribute_val, &flag);
  MPI_Attr_delete (comm, keyval);

  /* === Environmental inquiry === */
  char* name;
  int resultlen;
  MPI_Get_processor_name (name, &resultlen); // L226
  MPI_Errhandler errhandler;
#if 0
  MPI_Handler_function function;
  MPI_Errhandler_create (function, &errhandler); // L230
  MPI_Errhandler_set (comm, errhandler); // L232
  MPI_Errhandler_get (comm, &errhandler);
  MPI_Errhandler_free (&errhandler);
  int errorcode;
  char* string;
  MPI_Error_string (errorcode, string, &resultlen); // L237
  int errorclass;
  MPI_Error_class (errorcode, &errorclass); // L239
  MPI_Wtime ();
  MPI_Wtick ();
  int argc;
  char** argv;
  MPI_Init (&argc, &argv); // L244
  MPI_Finalize ();
  MPI_Initialized (&flag);
  MPI_Abort (comm, errorcode);
Пример #14
int main(int argc, char *argv[])
    int errs = 0;
    int rc, result;
    int ranks[1];
    MPI_Group group, outgroup;
    MPI_Comm comm;

    MTest_Init(&argc, &argv);
    /* To improve reporting of problems about operations, we
     * change the error handler to errors return */
    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    while (MTestGetComm(&comm, 1)) {
        if (comm == MPI_COMM_NULL)

        MPI_Comm_group(comm, &group);
        rc = MPI_Group_incl(group, 0, 0, &outgroup);
        if (rc) {
            printf("Error in creating an empty group with (0,0)\n");

            /* Some MPI implementations may reject a null "ranks" pointer */
            rc = MPI_Group_incl(group, 0, ranks, &outgroup);
            if (rc) {
                printf("Error in creating an empty group with (0,ranks)\n");

        if (outgroup != MPI_GROUP_EMPTY) {
            /* Is the group equivalent to group empty? */
            rc = MPI_Group_compare(outgroup, MPI_GROUP_EMPTY, &result);
            if (result != MPI_IDENT) {
                printf("Did not create a group equivalent to an empty group\n");
        rc = MPI_Group_free(&group);
        if (rc) {
        if (outgroup != MPI_GROUP_NULL) {
            rc = MPI_Group_free(&outgroup);
            if (rc) {


    return MTestReturnValue(errs);