void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { char *value; int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1; struct lov_user_md lum = { 0 }; int err, myrank, fd_sys, perm, amode, old_mask; int int_val, tmp_val; static char myname[] = "ADIOI_LUSTRE_SETINFO"; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); if ( (fd->info) == MPI_INFO_NULL) { /* This must be part of the open call. can set striping parameters if necessary. */ MPI_Info_create(&(fd->info)); ADIOI_Info_set(fd->info, "direct_read", "false"); ADIOI_Info_set(fd->info, "direct_write", "false"); fd->direct_read = fd->direct_write = 0; /* initialize lustre hints */ ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1"); fd->hints->fs_hints.lustre.co_ratio = 1; ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0"); fd->hints->fs_hints.lustre.coll_threshold = 0; ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable"); fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE; /* has user specified striping or server buffering parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { /* striping information */ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) str_unit=atoi(value); ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) str_factor=atoi(value); ADIOI_Info_get(users_info, "romio_lustre_start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) start_iodev=atoi(value); /* direct read and write */ ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) { ADIOI_Info_set(fd->info, "direct_read", "true"); fd->direct_read = 1; } ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) { ADIOI_Info_set(fd->info, "direct_write", "true"); fd->direct_write = 1; } } /* set striping information with ioctl */ MPI_Comm_rank(fd->comm, &myrank); if (myrank == 0) { stripe_val[0] = str_factor; stripe_val[1] = str_unit; stripe_val[2] = start_iodev; } MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm); if (stripe_val[0] != str_factor || stripe_val[1] != str_unit || stripe_val[2] != start_iodev) { FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys" "-striping_factor:striping_unit:start_iodevice " "need to be identical across all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { /* if user has specified striping info, process 0 tries to set it */ if (!myrank) { if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = old_mask ^ 0666; } else perm = fd->perm; amode = 0; if (fd->access_mode & ADIO_CREATE) amode = amode | O_CREAT; if (fd->access_mode & ADIO_RDONLY) amode = amode | O_RDONLY; if (fd->access_mode & ADIO_WRONLY) amode = amode | O_WRONLY; if (fd->access_mode & ADIO_RDWR) amode = amode | O_RDWR; if (fd->access_mode & ADIO_EXCL) amode = amode | O_EXCL; /* we need to create file so ensure this is set */ amode = amode | O_LOV_DELAY_CREATE | O_CREAT; fd_sys = open(fd->filename, amode, perm); if (fd_sys == -1) { if (errno != EEXIST) fprintf(stderr, "Failure to open file %s %d %d\n",strerror(errno), amode, perm); } else { lum.lmm_magic = LOV_USER_MAGIC; lum.lmm_pattern = 0; lum.lmm_stripe_size = str_unit; lum.lmm_stripe_count = str_factor; lum.lmm_stripe_offset = start_iodev; err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum); if (err == -1 && errno != EEXIST) { fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno)); } close(fd_sys); } } /* End of striping parameters validation */ } MPI_Barrier(fd->comm); } /* get other hint */ if (users_info != MPI_INFO_NULL) { /* CO: IO Clients/OST, * to keep the load balancing between clients and OSTs */ ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value, &flag); if (flag && (int_val = atoi(value)) > 0) { tmp_val = int_val; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != int_val) { MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, "romio_lustre_co_ratio", error_code); ADIOI_Free(value); return; } ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value); fd->hints->fs_hints.lustre.co_ratio = atoi(value); } /* coll_threshold: * if the req size is bigger than this, collective IO may not be performed. */ ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value, &flag); if (flag && (int_val = atoi(value)) > 0) { tmp_val = int_val; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != int_val) { MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, "romio_lustre_coll_threshold", error_code); ADIOI_Free(value); return; } ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value); fd->hints->fs_hints.lustre.coll_threshold = atoi(value); } /* ds_in_coll: disable data sieving in collective IO */ ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))) { tmp_val = int_val = 2; MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm); if (tmp_val != int_val) { MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, "romio_lustre_ds_in_coll", error_code); ADIOI_Free(value); return; } ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable"); fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE; } } /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); if (ADIOI_Direct_read) fd->direct_read = 1; if (ADIOI_Direct_write) fd->direct_write = 1; ADIOI_Free(value); *error_code = MPI_SUCCESS; }
void repairComm(MPI_Comm * broken, MPI_Comm * repaired, int iteration, int * listFails, int * numFails, int * numNodeFails, int sumPrevNumNodeFails, int argc, char ** argv, int verbosity) { MPI_Comm tempShrink, unorderIntracomm, tempIntercomm; int i, ret, result, procsNeeded = 0, oldRank, newRank, oldGroupSize, rankKey = 0, flag; int * tempRanks, * failedRanks, * errCodes, rank, hostfileLineIndex; MPI_Group oldGroup, failedGroup, shrinkGroup; int hostfileLastLineIndex, tempLineIndex, * failedNodeList = NULL, * nodeList = NULL, totNodeFailed = 0; double startTime = 0.0, endTime; int nprocs, j, * shrinkMergeList; char hostName[128]; gethostname(hostName, sizeof(hostName)); char ** appToLaunch; char *** argvToLaunch; int * procsNeededToLaunch; MPI_Info * hostInfoToLaunch; char ** hostNameToLaunch; MPI_Comm_rank(*broken, &rank); if(rank == 0) startTime = MPI_Wtime(); #ifndef GLOBAL_DETECTION MPI_Comm_size(*broken, &oldGroupSize); MPI_Comm_group(*broken, &oldGroup); MPI_Comm_rank(*broken, &oldRank); OMPI_Comm_failure_ack(*broken); OMPI_Comm_failure_get_acked(*broken, &failedGroup); MPI_Group_size(failedGroup, &procsNeeded); errCodes = (int *) malloc(sizeof(int) * procsNeeded); // Figure out ranks of the processes which had failed tempRanks = (int *) malloc(sizeof(int) * oldGroupSize); failedRanks = (int *) malloc(sizeof(int) * oldGroupSize); #pragma omp parallel for default(shared) for(i = 0; i < oldGroupSize; i++) tempRanks[i] = i; MPI_Group_translate_ranks(failedGroup, procsNeeded, tempRanks, oldGroup, failedRanks); #endif double shrinkTime = MPI_Wtime(); // Shrink the broken communicator to remove failed procs if(MPI_SUCCESS != (ret = OMPI_Comm_shrink(*broken, &tempShrink))) printf("Iteration %d: OMPI_Comm_shrink (parent): ERROR!\n", iteration); else { if(verbosity > 1 ) printf("Iteration %d: OMPI_Comm_shrink (parent): SUCCESS\n", iteration); } if (verbosity > 0 && rank == 0) printf("OMPI_Comm_shrink takes %0.6f Sec\n", MPI_Wtime() - shrinkTime); #ifdef GLOBAL_DETECTION MPI_Comm_group(*broken, &oldGroup); MPI_Comm_group(tempShrink, &shrinkGroup); MPI_Comm_size(*broken, &oldGroupSize); MPI_Group_compare(oldGroup, shrinkGroup, &result); if(result != MPI_IDENT) MPI_Group_difference(oldGroup, shrinkGroup, &failedGroup); MPI_Comm_rank(*broken, &oldRank); MPI_Group_size(failedGroup, &procsNeeded); errCodes = (int *) malloc(sizeof(int)*procsNeeded); // Figure out ranks of the processes which had failed tempRanks = (int*)malloc(sizeof(int)*oldGroupSize); failedRanks = (int*)malloc(sizeof(int)*oldGroupSize); #pragma omp parallel for default(shared) for(i = 0; i < oldGroupSize; i++) tempRanks[i] = i; MPI_Group_translate_ranks(failedGroup, procsNeeded, tempRanks, oldGroup, failedRanks); MPI_Group_free(&shrinkGroup); #endif // Assign number of failed processes *numFails = procsNeeded; hostNameToLaunch = (char **) malloc(procsNeeded * sizeof(char *)); if(verbosity > 0 && rank == 0) printf("*** Iteration %d: Application: Number of process(es) failed in the corresponding " "communicator is %d ***\n", iteration, procsNeeded); if(rank == 0) { endTime = MPI_Wtime(); printf("[%d]----- Creating failed process list takes %0.6f Sec (MPI_Wtime) -----\n", rank, endTime - startTime); } #ifdef RECOV_ON_SPARE_NODES // Determining total number of node failed, and a list of them hostfileLastLineIndex = getHostfileLastLineIndex(); //started from 0 nodeList = (int *) malloc((hostfileLastLineIndex+1) * sizeof(int)); memset(nodeList, 0, (hostfileLastLineIndex+1)*sizeof(int)); // initialize nodeList with 0's for(int i = 0; i < procsNeeded; ++i) { tempLineIndex = failedRanks[i]/SLOTS; //started from 0 nodeList[tempLineIndex] = 1; } for(int nodeCounter = 0; nodeCounter < (hostfileLastLineIndex+1); ++nodeCounter) totNodeFailed += nodeList[nodeCounter]; *numNodeFails = totNodeFailed; // Check if there is sufficient spare node available for recovery if((hostfileLastLineIndex - totNodeFailed -sumPrevNumNodeFails) < (oldGroupSize-1)/SLOTS) { if(rank == 0) printf("[%d] There is no sufficient spare node available for recovery.\n", rank); exit(0); } failedNodeList = (int *) malloc(totNodeFailed * sizeof(int)); memset(failedNodeList, 0, totNodeFailed * sizeof(int)); // initialize failedNodeList with 0's int failedNodeCounter = 0; for(int nodeCounter = 0; nodeCounter < (hostfileLastLineIndex+1); ++nodeCounter) { if(nodeList[nodeCounter] == 1) failedNodeList[failedNodeCounter++] = nodeCounter; } #endif char * hostNameFailed = NULL; #pragma omp parallel for default(shared) for(i = 0; i < procsNeeded; ++i) { // Assign list of processes failed listFails[i] = failedRanks[i]; #ifdef RUN_ON_COMPUTE_NODES tempLineIndex = failedRanks[i]/SLOTS; //started from 0 #ifdef RECOV_ON_SPARE_NODES for(int j = 0; j < totNodeFailed; ++j) { if(failedNodeList[j] == tempLineIndex) hostfileLineIndex = hostfileLastLineIndex - j - sumPrevNumNodeFails; } #else // Recovery on the same node (no node failure, only process failure) hostfileLineIndex = tempLineIndex; #endif hostNameToLaunch[i] = getHostToLaunch(hostfileLineIndex); hostNameFailed = getHostToLaunch(tempLineIndex); #else // Run on head node or personal machine hostNameToLaunch[i] = (char *)hostName; hostNameFailed = (char *)hostName; #endif if(verbosity > 0 && rank == 0) printf("--- Iteration %d: Application: Process %d on node %s is failed! ---\n", iteration, failedRanks[i], hostNameFailed); } // Release memory of hostNameFailed free(hostNameFailed); appToLaunch = (char **) malloc(procsNeeded * sizeof(char *)); argvToLaunch = (char ***) malloc(procsNeeded * sizeof(char **)); procsNeededToLaunch = (int *) malloc(procsNeeded * sizeof(int)); hostInfoToLaunch = (MPI_Info *) malloc(procsNeeded * sizeof(MPI_Info)); argv[argc] = NULL; #pragma omp parallel for default(shared) for(i = 0; i < procsNeeded; i++) { appToLaunch[i] = (char *)argv[0]; argvToLaunch[i] = (char **)argv; procsNeededToLaunch[i] = 1; // Host information where to spawn the processes MPI_Info_create(&hostInfoToLaunch[i]); MPI_Info_set(hostInfoToLaunch[i], (char *)"host", hostNameToLaunch[i]); //MPI_Info_set(hostInfoToLaunch[i], "hostfile", "hostfile"); } double spawnTime = MPI_Wtime(); #ifdef HANG_ON_REMOVE OMPI_Comm_agree(tempShrink, &flag); #endif // Spawn the new process(es) if(MPI_SUCCESS != (ret = MPI_Comm_spawn_multiple(procsNeeded, appToLaunch, argvToLaunch, procsNeededToLaunch, hostInfoToLaunch, 0, tempShrink, &tempIntercomm, MPI_ERRCODES_IGNORE))) { free(tempRanks); free(failedRanks); free(errCodes); if(MPI_ERR_COMM == ret) printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid communicator (parent)\n", iteration); if(MPI_ERR_ARG == ret) printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid argument (parent)\n", iteration); if(MPI_ERR_INFO == ret) printf("Iteration %d: MPI_Comm_spawn_multiple: Invalid info (parent)\n", iteration); if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) { OMPI_Comm_revoke(tempShrink); return repairComm(broken, repaired, iteration, listFails, numFails, numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity); } else { fprintf(stderr, "Iteration %d: Unknown error with MPI_Comm_spawn_multiple (parent): %d\n", iteration, ret); exit(1); } } else { if(verbosity > 0 && rank == 0) { for(i = 0; i < procsNeeded; i++) printf("Iteration %d: MPI_Comm_spawn_multiple (parent) [spawning failed process %d on " "node %s]: SUCCESS\n", iteration, failedRanks[i], hostNameToLaunch[i]); } // Memory release. Moving the last two to the end of the function causes segmentation faults for 4 processes failure } if (verbosity > 0 && rank == 0) printf("MPI_Comm_spawn_multiple takes %0.6f Sec\n", MPI_Wtime() - spawnTime); double mergeTime = MPI_Wtime(); // Merge the new processes into a new communicator if(MPI_SUCCESS != (ret = MPI_Intercomm_merge(tempIntercomm, false, &unorderIntracomm))) { free(tempRanks); free(failedRanks); if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) { // Start the recovery over again if there is a failure OMPI_Comm_revoke(tempIntercomm); return repairComm(broken, repaired, iteration, listFails, numFails, numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity); } else if(MPI_ERR_COMM == ret) { fprintf(stderr, "Iteration %d: Invalid communicator in MPI_Intercomm_merge (parent) %d\n", iteration, ret); exit(1); } else if(MPI_ERR_INTERN == ret) { fprintf(stderr, "Iteration %d: Acquaring memory error in MPI_Intercomm_merge ()%d\n", iteration, ret); exit(1); } else { fprintf(stderr, "Iteration %d: Unknown error with MPI_Intercomm_merge: %d\n", iteration, ret); exit(1); } } else { if(verbosity > 1 ) printf("Iteration %d: MPI_Intercomm_merge (parent): SUCCESS\n", iteration); } if (verbosity > 0 && rank == 0) printf("MPI_Intercomm_merge takes %0.6f Sec\n", MPI_Wtime() - mergeTime); double agreeTime = MPI_Wtime(); // Synchronize. sometimes hangs in without this // position of code and intercommunicator (not intra) is important #ifdef HANG_ON_REMOVE //MPI_Barrier(tempIntercomm); OMPI_Comm_agree(tempIntercomm, &flag);// since some of the times MPI_Barrier hangs #endif if (verbosity > 0 && rank == 0) printf("OMPI_Comm_agree takes %0.6f Sec\n", MPI_Wtime() - agreeTime); // Sending failed ranks and number of processes failed to the the newly created ranks. // oldGroupSize is the size of communicator before failure. // procsNeeded is the number of processes that are failed int * child = (int *) malloc(procsNeeded*sizeof(int)); #pragma omp parallel for default(shared) for(i = 0; i < procsNeeded; i++) child[i] = oldGroupSize - procsNeeded + i; MPI_Comm_rank(unorderIntracomm, &newRank); if(newRank == 0) { int send_val[2]; for(i = 0; i < procsNeeded; i++) { send_val[0] = failedRanks[i]; send_val[1] = procsNeeded; if(MPI_SUCCESS != (ret = MPI_Send(&send_val, 2, MPI_INT, child[i], MERGE_TAG, unorderIntracomm))) { free(tempRanks); free(failedRanks); if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) { // Start the recovery over again if there is a failure OMPI_Comm_revoke(unorderIntracomm); return repairComm(broken, repaired, iteration, listFails, numFails, numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity); } else { fprintf(stderr, "Iteration %d: Unknown error with MPI_Send1 (parent): %d\n", iteration, ret); exit(1); } } else { if(verbosity > 1 ) printf("Iteration %d: MPI_Send1 (parent): SUCCESS\n", iteration); } } } // Split the current world (splitted from original) to order the ranks. MPI_Comm_rank(unorderIntracomm, &newRank); MPI_Comm_size(unorderIntracomm, &nprocs); // For one or more process failure (ordering) shrinkMergeList = (int *) malloc(nprocs*sizeof(int)); j = 0; for(i = 0; i < nprocs; i++) { if(rankIsNotOnFailedList(i, failedRanks, procsNeeded)) shrinkMergeList[j++] = i; } for(i = j; i < nprocs; i++) shrinkMergeList[i] = failedRanks[i-j]; for(i = 0; i < (nprocs - procsNeeded); i++) { if(newRank == i) rankKey = shrinkMergeList[i]; } if(MPI_SUCCESS != (MPI_Comm_split(unorderIntracomm, 0, rankKey, repaired))) { if((MPI_ERR_PROC_FAILED == ret) || (MPI_ERR_REVOKED == ret)) { // Start the recovery over again if there is a failure OMPI_Comm_revoke(unorderIntracomm); return repairComm(broken, repaired, iteration, listFails, numFails, numNodeFails, sumPrevNumNodeFails, argc, argv, verbosity); } else { fprintf(stderr, "Iteration %d: Unknown error with MPI_Comm_split (parent): %d\n", iteration, ret); exit(1); } } else { if(verbosity > 1 ) printf("Iteration %d: MPI_Comm_split (parent): SUCCESS\n", iteration); } // Release memory free(appToLaunch); free(argvToLaunch); free(procsNeededToLaunch); free(hostInfoToLaunch); free(hostNameToLaunch); free(shrinkMergeList); free(errCodes); MPI_Comm_free(&tempShrink); free(tempRanks); free(failedRanks); free(child); MPI_Group_free(&failedGroup); MPI_Group_free(&oldGroup); MPI_Comm_free(&tempIntercomm); MPI_Comm_free(&unorderIntracomm); #ifdef RECOV_ON_SPARE_NODES if(failedNodeList != NULL) free(failedNodeList); if(nodeList != NULL) free(nodeList); #endif }//repairComm()
/* This test spawns two child jobs and has them open a port and connect to * each other. * The two children repeatedly connect, accept, and disconnect from each other. */ int main(int argc, char *argv[]) { int error; int rank, size; int numprocs = 3; char *argv1[2] = { (char*)"connector", NULL }; char *argv2[2] = { (char*)"acceptor", NULL }; MPI_Comm comm_connector, comm_acceptor, comm_parent, comm; char port[MPI_MAX_PORT_NAME] = {0}; MPI_Status status; MPI_Info spawn_path = MPI_INFO_NULL; int i, num_loops = 100; int data; int verbose = 0; if (getenv("MPITEST_VERBOSE")) { verbose = 1; } IF_VERBOSE(("init.\n")); error = MPI_Init(&argc, &argv); check_error(error, "MPI_Init"); IF_VERBOSE(("size.\n")); error = MPI_Comm_size(MPI_COMM_WORLD, &size); check_error(error, "MPI_Comm_size"); IF_VERBOSE(("rank.\n")); error = MPI_Comm_rank(MPI_COMM_WORLD, &rank); check_error(error, "MPI_Comm_rank"); if (argc == 1) { /* Make sure that the current directory is in the path. Not all implementations may honor or understand this, but it is highly recommended as it gives users a clean way to specify the location of the executable without specifying a particular directory format (e.g., this should work with both Windows and Unix implementations) */ MPI_Info_create( &spawn_path ); MPI_Info_set( spawn_path, (char*)"path", (char*)"." ); IF_VERBOSE(("spawn connector.\n")); error = MPI_Comm_spawn((char*)"disconnect_reconnect2", argv1, numprocs, spawn_path, 0, MPI_COMM_WORLD, &comm_connector, MPI_ERRCODES_IGNORE); check_error(error, "MPI_Comm_spawn"); IF_VERBOSE(("spawn acceptor.\n")); error = MPI_Comm_spawn((char*)"disconnect_reconnect2", argv2, numprocs, spawn_path, 0, MPI_COMM_WORLD, &comm_acceptor, MPI_ERRCODES_IGNORE); check_error(error, "MPI_Comm_spawn"); MPI_Info_free( &spawn_path ); if (rank == 0) { IF_VERBOSE(("recv port.\n")); error = MPI_Recv(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, comm_acceptor, &status); check_error(error, "MPI_Recv"); IF_VERBOSE(("send port.\n")); error = MPI_Send(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, comm_connector); check_error(error, "MPI_Send"); } IF_VERBOSE(("barrier acceptor.\n")); error = MPI_Barrier(comm_acceptor); check_error(error, "MPI_Barrier"); IF_VERBOSE(("barrier connector.\n")); error = MPI_Barrier(comm_connector); check_error(error, "MPI_Barrier"); error = MPI_Comm_free(&comm_acceptor); check_error(error, "MPI_Comm_free"); error = MPI_Comm_free(&comm_connector); check_error(error, "MPI_Comm_free"); if (rank == 0) { printf(" No Errors\n"); fflush(stdout); } } else if ((argc == 2) && (strcmp(argv[1], "acceptor") == 0)) { IF_VERBOSE(("get_parent.\n")); error = MPI_Comm_get_parent(&comm_parent); check_error(error, "MPI_Comm_get_parent"); if (comm_parent == MPI_COMM_NULL) { printf("acceptor's parent is NULL.\n");fflush(stdout); MPI_Abort(MPI_COMM_WORLD, -1); } if (rank == 0) { IF_VERBOSE(("open_port.\n")); error = MPI_Open_port(MPI_INFO_NULL, port); check_error(error, "MPI_Open_port"); IF_VERBOSE(("0: opened port: <%s>\n", port)); IF_VERBOSE(("send.\n")); error = MPI_Send(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, comm_parent); check_error(error, "MPI_Send"); } for (i=0; i<num_loops; i++) { IF_VERBOSE(("accept.\n")); error = MPI_Comm_accept(port, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &comm); check_error(error, "MPI_Comm_accept"); if (rank == 0) { data = i; error = MPI_Send(&data, 1, MPI_INT, 0, 0, comm); check_error(error, "MPI_Send"); error = MPI_Recv(&data, 1, MPI_INT, 0, 0, comm, &status); check_error(error, "MPI_Recv"); if (data != i) { printf("expected %d but received %d\n", i, data); fflush(stdout); MPI_Abort(MPI_COMM_WORLD, 1); } } IF_VERBOSE(("disconnect.\n")); error = MPI_Comm_disconnect(&comm); check_error(error, "MPI_Comm_disconnect"); } if (rank == 0) { IF_VERBOSE(("close_port.\n")); error = MPI_Close_port(port); check_error(error, "MPI_Close_port"); } IF_VERBOSE(("barrier.\n")); error = MPI_Barrier(comm_parent); check_error(error, "MPI_Barrier"); MPI_Comm_free( &comm_parent ); } else if ((argc == 2) && (strcmp(argv[1], "connector") == 0)) { IF_VERBOSE(("get_parent.\n")); error = MPI_Comm_get_parent(&comm_parent); check_error(error, "MPI_Comm_get_parent"); if (comm_parent == MPI_COMM_NULL) { printf("acceptor's parent is NULL.\n");fflush(stdout); MPI_Abort(MPI_COMM_WORLD, -1); } if (rank == 0) { IF_VERBOSE(("recv.\n")); error = MPI_Recv(port, MPI_MAX_PORT_NAME, MPI_CHAR, 0, 0, comm_parent, &status); check_error(error, "MPI_Recv"); IF_VERBOSE(("1: received port: <%s>\n", port)); } for (i=0; i<num_loops; i++) { IF_VERBOSE(("connect.\n")); error = MPI_Comm_connect(port, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &comm); check_error(error, "MPI_Comm_connect"); if (rank == 0) { data = -1; error = MPI_Recv(&data, 1, MPI_INT, 0, 0, comm, &status); check_error(error, "MPI_Recv"); if (data != i) { printf("expected %d but received %d\n", i, data); fflush(stdout); MPI_Abort(MPI_COMM_WORLD, 1); } error = MPI_Send(&data, 1, MPI_INT, 0, 0, comm); check_error(error, "MPI_Send"); } IF_VERBOSE(("disconnect.\n")); error = MPI_Comm_disconnect(&comm); check_error(error, "MPI_Comm_disconnect"); } IF_VERBOSE(("barrier.\n")); error = MPI_Barrier(comm_parent); check_error(error, "MPI_Barrier"); MPI_Comm_free( &comm_parent ); } else { printf("invalid command line.\n");fflush(stdout); { int ii; for (ii=0; ii<argc; ii++) { printf("argv[%d] = <%s>\n", ii, argv[ii]); } } fflush(stdout); MPI_Abort(MPI_COMM_WORLD, -2); } MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; MPI_Info info1, infodup; int nkeys, nkeysdup, i, vallen, flag, flagdup; char key[MPI_MAX_INFO_KEY], keydup[MPI_MAX_INFO_KEY]; char value[MPI_MAX_INFO_VAL], valdup[MPI_MAX_INFO_VAL]; MTest_Init( &argc, &argv ); MPI_Info_create( &info1 ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ MPI_Info_set( info1, (char*)"host", (char*)"myhost.myorg.org" ); MPI_Info_set( info1, (char*)"file", (char*)"runfile.txt" ); MPI_Info_set( info1, (char*)"soft", (char*)"2:1000:4,3:1000:7" ); MPI_Info_dup( info1, &infodup ); MPI_Info_get_nkeys( infodup, &nkeysdup ); MPI_Info_get_nkeys( info1, &nkeys ); if (nkeys != nkeysdup) { errs++; printf( "Dup'ed info has a different number of keys; is %d should be %d\n", nkeysdup, nkeys ); } vallen = MPI_MAX_INFO_VAL; for (i=0; i<nkeys; i++) { /* MPI requires that the keys are in the same order after the dup */ MPI_Info_get_nthkey( info1, i, key ); MPI_Info_get_nthkey( infodup, i, keydup ); if (strcmp(key, keydup)) { errs++; printf( "keys do not match: %s should be %s\n", keydup, key ); } vallen = MPI_MAX_INFO_VAL; MPI_Info_get( info1, key, vallen, value, &flag ); MPI_Info_get( infodup, keydup, vallen, valdup, &flagdup ); if (!flag || !flagdup) { errs++; printf( "Info get failed for key %s\n", key ); } else if (strcmp( value, valdup )) { errs++; printf( "Info values for key %s not the same after dup\n", key ); } } /* Change info and check that infodup does NOT have the new value (ensure that lazy dups are still duped) */ MPI_Info_set( info1, (char*)"path", (char*)"/a:/b:/c/d" ); MPI_Info_get( infodup, (char*)"path", vallen, value, &flag ); if (flag) { errs++; printf( "inserting path into info changed infodup\n" ); } MPI_Info_free( &info1 ); MPI_Info_free( &infodup ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { int Block_order; size_t Block_size; size_t Colblock_size; int Tile_order=32; int tiling; int Num_procs; /* Number of ranks */ int order; /* overall matrix order */ int send_to, recv_from; /* communicating ranks */ size_t bytes; /* total amount of data to be moved */ int my_ID; /* rank */ int root=0; /* root rank of a communicator */ int iterations; /* number of times to run the pipeline algorithm */ int i, j, it, jt, ID;/* dummies */ int iter; /* index of iteration */ int phase; /* phase in the staged communication */ size_t colstart; /* sequence number of first column owned by calling rank */ int error=0; /* error flag */ double *A_p; /* original matrix column block */ double *B_p; /* transposed matrix column block */ double *Work_in_p; /* workspace for the transpose function */ double *Work_out_p;/* workspace for the transpose function */ double abserr, abserr_tot; /* computed error */ double epsilon = 1.e-8; /* error tolerance */ double local_trans_time, /* timing parameters */ trans_time, avgtime; MPI_Status status; /* completion status of message */ MPI_Win shm_win_A; /* Shared Memory window object */ MPI_Win shm_win_B; /* Shared Memory window object */ MPI_Win shm_win_Work_in; /* Shared Memory window object */ MPI_Win shm_win_Work_out; /* Shared Memory window object */ MPI_Info rma_winfo;/* info for window */ MPI_Comm shm_comm_prep;/* Shared Memory prep Communicator */ MPI_Comm shm_comm; /* Shared Memory Communicator */ int shm_procs; /* # of ranks in shared domain */ int shm_ID; /* MPI rank within coherence domain */ int group_size; /* number of ranks per shared memory group */ int Num_groups; /* number of shared memory group */ int group_ID; /* sequence number of shared memory group */ int size_mul; /* size multiplier; 0 for non-root ranks in coherence domain*/ int istart; MPI_Request send_req, recv_req; /********************************************************************************* ** Initialize the MPI environment **********************************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); root = 0; /********************************************************************* ** process, test and broadcast input parameter *********************************************************************/ if (my_ID == root){ if (argc != 4 && argc !=5){ printf("Usage: %s <#ranks per coherence domain> <# iterations> <matrix order> [tile size]\n", *argv); error = 1; goto ENDOFTESTS; } group_size = atoi(*++argv); if (group_size < 1) { printf("ERROR: # ranks per coherence domain must be >= 1 : %d \n",group_size); error = 1; goto ENDOFTESTS; } if (Num_procs%group_size) { printf("ERROR: toal # %d ranks not divisible by ranks per coherence domain %d\n", Num_procs, group_size); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); error = 1; goto ENDOFTESTS; } order = atoi(*++argv); if (order < Num_procs) { printf("ERROR: matrix order %d should at least # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (order%Num_procs) { printf("ERROR: matrix order %d should be divisible by # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (argc == 5) Tile_order = atoi(*++argv); ENDOFTESTS:; } bail_out(error); /* Broadcast input data to all ranks */ MPI_Bcast(&order, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&iterations, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&Tile_order, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&group_size, 1, MPI_INT, root, MPI_COMM_WORLD); if (my_ID == root) { printf("MPI+SHM Matrix transpose: B = A^T\n"); printf("Number of ranks = %d\n", Num_procs); printf("Rank group size = %d\n", group_size); printf("Matrix order = %d\n", order); printf("Number of iterations = %d\n", iterations); if ((Tile_order > 0) && (Tile_order < order)) printf("Tile size = %d\n", Tile_order); else printf("Untiled\n"); #ifndef SYNCHRONOUS printf("Non-"); #endif printf("Blocking messages\n"); } /* Setup for Shared memory regions */ /* first divide WORLD in groups of size group_size */ MPI_Comm_split(MPI_COMM_WORLD, my_ID/group_size, my_ID%group_size, &shm_comm_prep); /* derive from that a SHM communicator */ MPI_Comm_split_type(shm_comm_prep, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shm_comm); MPI_Comm_rank(shm_comm, &shm_ID); MPI_Comm_size(shm_comm, &shm_procs); /* do sanity check, making sure groups did not shrink in second comm split */ if (shm_procs != group_size) MPI_Abort(MPI_COMM_WORLD, 666); /* a non-positive tile size means no tiling of the local transpose */ tiling = (Tile_order > 0) && (Tile_order < order); bytes = 2 * sizeof(double) * order * order; /********************************************************************* ** The matrix is broken up into column blocks that are mapped one to a ** rank. Each column block is made up of Num_procs smaller square ** blocks of order block_order. *********************************************************************/ Num_groups = Num_procs/group_size; Block_order = order/Num_groups; group_ID = my_ID/group_size; colstart = Block_order * group_ID; Colblock_size = order * Block_order; Block_size = Block_order * Block_order; /********************************************************************* ** Create the column block of the test matrix, the column block of the ** transposed matrix, and workspace (workspace only if #procs>1) *********************************************************************/ /* RMA win info */ MPI_Info_create(&rma_winfo); /* This key indicates that passive target RMA will not be used. * It is the one info key that MPICH actually uses for optimization. */ MPI_Info_set(rma_winfo, "no_locks", "true"); /* only the root of each SHM domain specifies window of nonzero size */ size_mul = (shm_ID==0); int offset = 32; MPI_Aint size= (Colblock_size+offset)*sizeof(double)*size_mul; int disp_unit; MPI_Win_allocate_shared(size, sizeof(double), rma_winfo, shm_comm, (void *) &A_p, &shm_win_A); MPI_Win_shared_query(shm_win_A, MPI_PROC_NULL, &size, &disp_unit, (void *)&A_p); if (A_p == NULL){ printf(" Error allocating space for original matrix on node %d\n",my_ID); error = 1; } bail_out(error); A_p += offset; MPI_Win_allocate_shared(size, sizeof(double), rma_winfo, shm_comm, (void *) &B_p, &shm_win_B); MPI_Win_shared_query(shm_win_B, MPI_PROC_NULL, &size, &disp_unit, (void *)&B_p); if (B_p == NULL){ printf(" Error allocating space for transposed matrix by group %d\n",group_ID); error = 1; } bail_out(error); B_p += offset; if (Num_groups>1) { size = Block_size*sizeof(double)*size_mul; MPI_Win_allocate_shared(size, sizeof(double),rma_winfo, shm_comm, (void *) &Work_in_p, &shm_win_Work_in); MPI_Win_shared_query(shm_win_Work_in, MPI_PROC_NULL, &size, &disp_unit, (void *)&Work_in_p); if (Work_in_p == NULL){ printf(" Error allocating space for in block by group %d\n",group_ID); error = 1; } bail_out(error); MPI_Win_allocate_shared(size, sizeof(double), rma_winfo, shm_comm, (void *) &Work_out_p, &shm_win_Work_out); MPI_Win_shared_query(shm_win_Work_out, MPI_PROC_NULL, &size, &disp_unit, (void *)&Work_out_p); if (Work_out_p == NULL){ printf(" Error allocating space for out block by group %d\n",group_ID); error = 1; } bail_out(error); } /* Fill the original column matrix */ istart = 0; int chunk_size = Block_order/group_size; if (tiling) { for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j+=Tile_order) { for (i=0;i<order; i+=Tile_order) for (jt=j; jt<MIN((shm_ID+1)*chunk_size,j+Tile_order); jt++) for (it=i; it<MIN(order,i+Tile_order); it++) { A(it,jt) = (double) (order*(jt+colstart) + it); B(it,jt) = -1.0; } } } else { for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j++) for (i=0;i<order; i++) { A(i,j) = (double) (order*(j+colstart) + i); B(i,j) = -1.0; } } /* NEED A STORE FENCE HERE */ MPI_Barrier(shm_comm); for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_trans_time = wtime(); } /* do the local transpose */ istart = colstart; if (!tiling) { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++) { for (j=0; j<Block_order; j++) B(j,i) = A(i,j); } } else { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) for (it=i; it<MIN(Block_order,i+Tile_order); it++) for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { B(jt,it) = A(it,jt); } } } for (phase=1; phase<Num_groups; phase++){ recv_from = ((group_ID + phase )%Num_groups); send_to = ((group_ID - phase + Num_groups)%Num_groups); #ifndef SYNCHRONOUS if (shm_ID==0) { MPI_Irecv(Work_in_p, Block_size, MPI_DOUBLE, recv_from*group_size, phase, MPI_COMM_WORLD, &recv_req); } #endif istart = send_to*Block_order; if (!tiling) { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++) for (j=0; j<Block_order; j++){ Work_out(j,i) = A(i,j); } } else { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i+=Tile_order) for (j=0; j<Block_order; j+=Tile_order) for (it=i; it<MIN(Block_order,i+Tile_order); it++) for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { Work_out(jt,it) = A(it,jt); } } /* NEED A LOAD/STORE FENCE HERE */ MPI_Barrier(shm_comm); if (shm_ID==0) { #ifndef SYNCHRONOUS MPI_Isend(Work_out_p, Block_size, MPI_DOUBLE, send_to*group_size, phase, MPI_COMM_WORLD, &send_req); MPI_Wait(&recv_req, &status); MPI_Wait(&send_req, &status); #else MPI_Sendrecv(Work_out_p, Block_size, MPI_DOUBLE, send_to*group_size, phase, Work_in_p, Block_size, MPI_DOUBLE, recv_from*group_size, phase, MPI_COMM_WORLD, &status); #endif } /* NEED A LOAD FENCE HERE */ MPI_Barrier(shm_comm); istart = recv_from*Block_order; /* scatter received block to transposed matrix; no need to tile */ for (j=shm_ID*chunk_size; j<(shm_ID+1)*chunk_size; j++) for (i=0; i<Block_order; i++) B(i,j) = Work_in(i,j); } /* end of phase loop */ } /* end of iterations */ local_trans_time = wtime() - local_trans_time; MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); abserr = 0.0; istart = 0; /* for (j=shm_ID;j<Block_order;j+=group_size) for (i=0;i<order; i++) { */ for (j=shm_ID*chunk_size; j<(shm_ID+1)*chunk_size; j++) for (i=0;i<order; i++) { abserr += ABS(B(i,j) - (double)(order*i + j+colstart)); } MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD); if (my_ID == root) { if (abserr_tot < epsilon) { printf("Solution validates\n"); avgtime = trans_time/(double)iterations; printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime); #ifdef VERBOSE printf("Summed errors: %f \n", abserr_tot); #endif } else { printf("ERROR: Aggregate squared error %e exceeds threshold %e\n", abserr_tot, epsilon); error = 1; } } bail_out(error); MPI_Win_free(&shm_win_A); MPI_Win_free(&shm_win_B); if (Num_groups>1) { MPI_Win_free(&shm_win_Work_in); MPI_Win_free(&shm_win_Work_out); } MPI_Info_free(&rma_winfo); MPI_Finalize(); exit(EXIT_SUCCESS); } /* end of main */
/* * access style is explicitly described as modifiable. values include * read_once, read_mostly, write_once, write_mostlye, random * * */ int main(int argc, char *argv[]) { int errs = 0, err; int buf[10]; int rank; MPI_Comm comm; MPI_Status status; MPI_File fh; MPI_Info infoin, infoout; char value[1024]; int flag, count; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Info_create(&infoin); MPI_Info_set(infoin, (char *) "access_style", (char *) "write_once,random"); MPI_File_open(comm, (char *) "testfile", MPI_MODE_RDWR | MPI_MODE_CREATE, infoin, &fh); buf[0] = rank; err = MPI_File_write_ordered(fh, buf, 1, MPI_INT, &status); if (err) { errs++; MTestPrintError(err); } MPI_Info_set(infoin, (char *) "access_style", (char *) "read_once"); err = MPI_File_seek_shared(fh, 0, MPI_SEEK_SET); if (err) { errs++; MTestPrintError(err); } err = MPI_File_set_info(fh, infoin); if (err) { errs++; MTestPrintError(err); } MPI_Info_free(&infoin); buf[0] = -1; err = MPI_File_read_ordered(fh, buf, 1, MPI_INT, &status); if (err) { errs++; MTestPrintError(err); } MPI_Get_count(&status, MPI_INT, &count); if (count != 1) { errs++; printf("Expected to read one int, read %d\n", count); } if (buf[0] != rank) { errs++; printf("Did not read expected value (%d)\n", buf[0]); } err = MPI_File_get_info(fh, &infoout); if (err) { errs++; MTestPrintError(err); } MPI_Info_get(infoout, (char *) "access_style", 1024, value, &flag); /* Note that an implementation is allowed to ignore the set_info, * so we'll accept either the original or the updated version */ if (!flag) { ; /* * errs++; * printf("Access style hint not saved\n"); */ } else { if (strcmp(value, "read_once") != 0 && strcmp(value, "write_once,random") != 0) { errs++; printf("value for access_style unexpected; is %s\n", value); } } MPI_Info_free(&infoout); err = MPI_File_close(&fh); if (err) { errs++; MTestPrintError(err); } MPI_Barrier(comm); MPI_Comm_rank(comm, &rank); if (rank == 0) { err = MPI_File_delete((char *) "testfile", MPI_INFO_NULL); if (err) { errs++; MTestPrintError(err); } } MTest_Finalize(errs); return MTestReturnValue(errs); }
int main(int argc, char **argv) { int i, len, nkeys, flag, mynod, default_striping_factor=0, nprocs, errs = 0; MPI_File fh; MPI_Info info, info_used; char *filename, key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { if (!strcmp("-v", *argv)) verbose = 1; i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: file_info [-v] -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Bcast(&verbose, 1, MPI_INT, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+1); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Bcast(&verbose, 1, MPI_INT, 0, MPI_COMM_WORLD); } /* open the file with MPI_INFO_NULL */ MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); /* check the default values set by ROMIO */ MPI_File_get_info(fh, &info_used); MPI_Info_get_nkeys(info_used, &nkeys); for (i=0; i<nkeys; i++) { MPI_Info_get_nthkey(info_used, i, key); MPI_Info_get(info_used, key, MPI_MAX_INFO_VAL-1, value, &flag); #ifdef INFO_DEBUG if (!mynod) fprintf(stderr, "Process %d, Default: key = %s, value = %s\n", mynod, key, value); #endif if (!strcmp("striping_factor", key)) { default_striping_factor = atoi(value); /* no check */ } else if (!strcmp("cb_buffer_size", key)) { if (atoi(value) != DFLT_CB_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "cb_buffer_size is %d; should be %d\n", atoi(value), DFLT_CB_BUFFER_SIZE); } } else if (!strcmp("romio_cb_read", key)) { if (strcmp(DFLT_ROMIO_CB_READ, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n", value, DFLT_ROMIO_CB_READ); } } else if (!strcmp("romio_cb_write", key)) { if (strcmp(DFLT_ROMIO_CB_WRITE, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n", value, DFLT_ROMIO_CB_WRITE); } } else if (!strcmp("cb_nodes", key)) { /* unreliable test -- just ignore value */ } else if (!strcmp("romio_no_indep_rw", key)) { if (strcmp("false", value)) { errs++; if (verbose) fprintf(stderr, "romio_no_indep_rw is set to %s; should be %s\n", value, "false"); } } else if (!strcmp("ind_rd_buffer_size", key)) { if (atoi(value) != DFLT_IND_RD_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "ind_rd_buffer_size is %d; should be %d\n", atoi(value), DFLT_IND_RD_BUFFER_SIZE); } } else if (!strcmp("ind_wr_buffer_size", key)) { if (atoi(value) != DFLT_IND_WR_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "ind_wr_buffer_size is %d; should be %d\n", atoi(value), DFLT_IND_WR_BUFFER_SIZE); } } else if (!strcmp("romio_ds_read", key)) { if (strcmp("automatic", value)) { errs++; if (verbose) fprintf(stderr, "romio_ds_read is set to %s; should be %s\n", value, "automatic"); } } else if (!strcmp("romio_ds_write", key)) { /* Unreliable test -- value is file system dependent. Ignore. */ } else if (!strcmp("cb_config_list", key)) { #ifndef SKIP_CB_CONFIG_LIST_TEST if (strcmp("*:1", value)) { errs++; if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n", value, "*:1"); } #endif } /* don't care about the defaults for these keys */ else if (!strcmp("romio_cb_pfr", key)) { } else if (!strcmp("romio_cb_fr_types", key)) { } else if (!strcmp("romio_cb_fr_alignment", key)) { } else if (!strcmp("romio_cb_ds_threshold", key)) { } else if (!strcmp("romio_cb_alltoall", key)) { } else { if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key); } } MPI_Info_free(&info_used); MPI_File_close(&fh); /* delete the file */ if (!mynod) MPI_File_delete(filename, MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); /* set new info values. */ MPI_Info_create(&info); /* The following four hints are accepted on all machines. They can be specified at file-open time or later (any number of times). */ /* buffer size for collective I/O */ MPI_Info_set(info, "cb_buffer_size", "8388608"); /* number of processes that actually perform I/O in collective I/O */ sprintf(value, "%d", nprocs/2); MPI_Info_set(info, "cb_nodes", value); /* buffer size for data sieving in independent reads */ MPI_Info_set(info, "ind_rd_buffer_size", "2097152"); /* buffer size for data sieving in independent writes */ MPI_Info_set(info, "ind_wr_buffer_size", "1048576"); /* The following three hints related to file striping are accepted only on Intel PFS and IBM PIOFS file systems and are ignored elsewhere. They can be specified only at file-creation time; if specified later they will be ignored. */ /* number of I/O devices across which the file will be striped. accepted only if 0 < value < default_striping_factor; ignored otherwise */ if (default_striping_factor - 1 > 0) { sprintf(value, "%d", default_striping_factor-1); MPI_Info_set(info, "striping_factor", value); } else { sprintf(value, "%d", default_striping_factor); MPI_Info_set(info, "striping_factor", value); } /* the striping unit in bytes */ MPI_Info_set(info, "striping_unit", "131072"); #ifndef SKIP_CB_CONFIG_LIST_TEST /* set the cb_config_list so we'll get deterministic cb_nodes output */ MPI_Info_set(info, "cb_config_list", "*:*"); #endif /* the I/O device number from which to start striping the file. accepted only if 0 <= value < default_striping_factor; ignored otherwise */ sprintf(value, "%d", default_striping_factor-2); MPI_Info_set(info, "start_iodevice", value); /* The following hint about PFS server buffering is accepted only on Intel PFS. It can be specified anytime. */ MPI_Info_set(info, "pfs_svr_buf", "true"); /* open the file and set new info */ MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* check the values set */ MPI_File_get_info(fh, &info_used); MPI_Info_get_nkeys(info_used, &nkeys); for (i=0; i<nkeys; i++) { MPI_Info_get_nthkey(info_used, i, key); MPI_Info_get(info_used, key, MPI_MAX_INFO_VAL-1, value, &flag); #ifdef INFO_DEBUG if (!mynod) fprintf(stderr, "Process %d, key = %s, value = %s\n", mynod, key, value); #endif if (!strcmp("striping_factor", key)) { if ((default_striping_factor - 1 > 0) && (atoi(value) != default_striping_factor-1)) { errs++; if (verbose) fprintf(stderr, "striping_factor is %d; should be %d\n", atoi(value), default_striping_factor-1); } else if (atoi(value) != default_striping_factor) { errs++; if (verbose) fprintf(stderr, "striping_factor is %d; should be %d\n", atoi(value), default_striping_factor); } } else if (!strcmp("cb_buffer_size", key)) { if (atoi(value) != 8388608) { errs++; if (verbose) fprintf(stderr, "cb_buffer_size is %d; should be %d\n", atoi(value), 8388608); } } else if (!strcmp("romio_cb_read", key)) { if (strcmp(DFLT_ROMIO_CB_READ, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n", value, DFLT_ROMIO_CB_READ); } } else if (!strcmp("romio_cb_write", key)) { if (strcmp(DFLT_ROMIO_CB_WRITE, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n", value, DFLT_ROMIO_CB_WRITE); } } else if (!strcmp("cb_nodes", key)) { if (atoi(value) != (nprocs/2)) { errs++; if (verbose) fprintf(stderr, "cb_nodes is %d; should be %d\n", atoi(value), nprocs/2); } } else if (!strcmp("romio_no_indep_rw", key)) { if (strcmp("false", value)) { errs++; if (verbose) fprintf(stderr, "romio_no_indep_rw is set to %s; should be %s\n", value, "false"); } } else if (!strcmp("ind_rd_buffer_size", key)) { if (atoi(value) != 2097152) { errs++; if (verbose) fprintf(stderr, "ind_rd_buffer_size is %d; should be %d\n", atoi(value), 2097152); } } else if (!strcmp("ind_wr_buffer_size", key)) { if (atoi(value) != 1048576) { errs++; if (verbose) fprintf(stderr, "ind_wr_buffer_size is %d; should be %d\n", atoi(value), 1048576); } } else if (!strcmp("romio_ds_read", key)) { if (strcmp("automatic", value)) { errs++; if (verbose) fprintf(stderr, "romio_ds_read is set to %s; should be %s\n", value, "automatic"); } } else if (!strcmp("romio_ds_write", key)) { /* Unreliable test -- value is file system dependent. Ignore. */ } else if (!strcmp("cb_config_list", key)) { #ifndef SKIP_CB_CONFIG_LIST_TEST if (strcmp("*:*", value)) { errs++; if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n", value, "*:*"); } #endif } else if (!strcmp("romio_cb_pfr", key)) { if(strcmp("disable", value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_pfr is set to %s; should be %s\n", value, "automatic"); } } else if (!strcmp("romio_cb_fr_types", key)) { if(strcmp("aar", value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_fr_types is set to %s; should be %s\n", value, "aar"); } } else if (!strcmp("romio_cb_fr_alignment", key)) { if(strcmp("1", value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_fr_alignment is set to %s; should be %s\n", value, "1"); } } else if (!strcmp("romio_cb_ds_threshold", key)) { if(strcmp("0", value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_ds_threshold is set to %s; should be %s\n", value, "0"); } } else if (!strcmp("romio_cb_alltoall", key)) { if(strcmp("automatic", value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_alltoall is set to %s; should be %s\n", value, "automatic"); } } else { if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key); } } /* Q: SHOULD WE BOTHER LOOKING AT THE OTHER PROCESSES? */ if (!mynod) { if (errs) fprintf(stderr, "Found %d errors.\n", errs); else printf(" No Errors\n"); } MPI_File_close(&fh); free(filename); MPI_Info_free(&info_used); MPI_Info_free(&info); MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf; int err = 0; char str[32]; size_t start[2], count[2]; MPI_Comm comm=MPI_COMM_SELF; MPI_Info info=MPI_INFO_NULL; printf("\n*** Testing bug fix with changing pnetcdf variable offsets..."); MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs > 1 && rank == 0) printf("This test program is intended to run on ONE process\n"); if (rank > 0) goto fn_exit; /* first, use PnetCDF to create a file with default header/variable alignment */ #ifdef DISABLE_PNETCDF_ALIGNMENT MPI_Info_create(&info); MPI_Info_set(info, "nc_header_align_size", "1"); MPI_Info_set(info, "nc_var_align_size", "1"); #endif cmode = NC_PNETCDF | NC_CLOBBER; if (nc_create_par(FILENAME, cmode, comm, info, &ncid)) ERR_RET; /* define dimension */ if (nc_def_dim(ncid, "Y", NC_UNLIMITED, &dimid[0])) ERR; if (nc_def_dim(ncid, "X", NX, &dimid[1])) ERR; /* Odd numbers are fixed variables, even numbers are record variables */ for (i=0; i<NVARS; i++) { if (i%2) { sprintf(str,"fixed_var_%d",i); if (nc_def_var(ncid, str, NC_INT, 1, dimid+1, &varid[i])) ERR; } else { sprintf(str,"record_var_%d",i); if (nc_def_var(ncid, str, NC_INT, 2, dimid, &varid[i])) ERR; } } if (nc_enddef(ncid)) ERR; for (i=0; i<NVARS; i++) { /* Note NC_INDEPENDENT is the default */ if (nc_var_par_access(ncid, varid[i], NC_INDEPENDENT)) ERR; } /* write all variables */ buf = (int*) malloc(NX * sizeof(int)); for (i=0; i<NVARS; i++) { for (j=0; j<NX; j++) buf[j] = i*10 + j; if (i%2) { start[0] = 0; count[0] = NX; if (nc_put_vara_int(ncid, varid[i], start, count, buf)) ERR; } else { start[0] = 0; start[1] = 0; count[0] = 1; count[1] = NX; if (nc_put_vara_int(ncid, varid[i], start, count, buf)) ERR; } } if (nc_close(ncid)) ERR; if (info != MPI_INFO_NULL) MPI_Info_free(&info); /* re-open the file with netCDF (parallel) and enter define mode */ if (nc_open_par(FILENAME, NC_WRITE|NC_PNETCDF, comm, info, &ncid)) ERR_RET; if (nc_redef(ncid)) ERR; /* add attributes to make header grow */ for (i=0; i<NVARS; i++) { sprintf(str, "annotation_for_var_%d",i); if (nc_put_att_text(ncid, varid[i], "text_attr", strlen(str), str)) ERR; } if (nc_enddef(ncid)) ERR; /* read variables and check their contents */ for (i=0; i<NVARS; i++) { for (j=0; j<NX; j++) buf[j] = -1; if (i%2) { start[0] = 0; count[0] = NX; if (nc_get_var_int(ncid, varid[i], buf)) ERR; for (j=0; j<NX; j++) if (buf[j] != i*10 + j) printf("unexpected read value var i=%d buf[j=%d]=%d should be %d\n",i,j,buf[j],i*10+j); } else { start[0] = 0; start[1] = 0; count[0] = 1; count[1] = NX; if (nc_get_vara_int(ncid, varid[i], start, count, buf)) ERR; for (j=0; j<NX; j++) if (buf[j] != i*10+j) printf("unexpected read value var i=%d buf[j=%d]=%d should be %d\n",i,j,buf[j],i*10+j); } } if (nc_close(ncid)) ERR; fn_exit: MPI_Finalize(); SUMMARIZE_ERR; FINAL_RESULTS; return 0; }
int main(int argc, char *argv[]) { MPI_Info infos[MAX_INFOS]; char key[64], value[64]; int errs = 0; int i, j; MTest_Init(&argc, &argv); for (i = 0; i < MAX_INFOS; i++) { MPI_Info_create(&infos[i]); #ifdef DBG printf("Info handle is %x\n", infos[i]); #endif for (j = 0; j < info_list; j++) { sprintf(key, "key%d-%d", i, j); sprintf(value, "value%d-%d", i, j); #ifdef DBG printf("Creating key/value %s=%s\n", key, value); #endif MPI_Info_set(infos[i], key, value); } #ifdef DBG { int nkeys; MPI_Info_get_nkeys(infos[0], &nkeys); if (nkeys != info_list) { printf("infos[0] changed at %d info\n", i); } } #endif } for (i = 0; i < MAX_INFOS; i++) { int nkeys; /*printf("info = %x\n", infos[i]); * print_handle(infos[i]); printf("\n"); */ MPI_Info_get_nkeys(infos[i], &nkeys); if (nkeys != info_list) { errs++; if (errs < MAX_ERRORS) { printf("Wrong number of keys for info %d; got %d, should be %d\n", i, nkeys, info_list); } } for (j = 0; j < nkeys; j++) { char keystr[64]; char valstr[64]; int flag; MPI_Info_get_nthkey(infos[i], j, key); sprintf(keystr, "key%d-%d", i, j); if (strcmp(keystr, key) != 0) { errs++; if (errs < MAX_ERRORS) { printf("Wrong key for info %d; got %s expected %s\n", i, key, keystr); } continue; } MPI_Info_get(infos[i], key, sizeof(value), value, &flag); if (!flag) { errs++; if (errs < MAX_ERRORS) { printf("Get failed to return value for info %d\n", i); } continue; } sprintf(valstr, "value%d-%d", i, j); if (strcmp(valstr, value) != 0) { errs++; if (errs < MAX_ERRORS) { printf("Wrong value for info %d; got %s expected %s\n", i, value, valstr); } } } } for (i = 0; i < MAX_INFOS; i++) { MPI_Info_free(&infos[i]); } MTest_Finalize(errs); return MTestReturnValue(errs); }
int MTestGetWin( MPI_Win *win, int mustBePassive ) { static char actbuf[1024]; static char *pasbuf; char *buf; int n, rank, merr; MPI_Info info; if (mem_keyval == MPI_KEYVAL_INVALID) { /* Create the keyval */ merr = MPI_Win_create_keyval( MPI_WIN_NULL_COPY_FN, MPI_WIN_NULL_DELETE_FN, &mem_keyval, 0 ); if (merr) MTestPrintError( merr ); } switch (win_index) { case 0: /* Active target window */ merr = MPI_Win_create( actbuf, 1024, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win ); if (merr) MTestPrintError( merr ); winName = "active-window"; merr = MPI_Win_set_attr( *win, mem_keyval, (void *)0 ); if (merr) MTestPrintError( merr ); break; case 1: /* Passive target window */ merr = MPI_Alloc_mem( 1024, MPI_INFO_NULL, &pasbuf ); if (merr) MTestPrintError( merr ); merr = MPI_Win_create( pasbuf, 1024, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win ); if (merr) MTestPrintError( merr ); winName = "passive-window"; merr = MPI_Win_set_attr( *win, mem_keyval, (void *)2 ); if (merr) MTestPrintError( merr ); break; case 2: /* Active target; all windows different sizes */ merr = MPI_Comm_rank( MPI_COMM_WORLD, &rank ); if (merr) MTestPrintError( merr ); n = rank * 64; if (n) buf = (char *)malloc( n ); else buf = 0; merr = MPI_Win_create( buf, n, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win ); if (merr) MTestPrintError( merr ); winName = "active-all-different-win"; merr = MPI_Win_set_attr( *win, mem_keyval, (void *)1 ); if (merr) MTestPrintError( merr ); break; case 3: /* Active target, no locks set */ merr = MPI_Comm_rank( MPI_COMM_WORLD, &rank ); if (merr) MTestPrintError( merr ); n = rank * 64; if (n) buf = (char *)malloc( n ); else buf = 0; merr = MPI_Info_create( &info ); if (merr) MTestPrintError( merr ); merr = MPI_Info_set( info, (char*)"nolocks", (char*)"true" ); if (merr) MTestPrintError( merr ); merr = MPI_Win_create( buf, n, 1, info, MPI_COMM_WORLD, win ); if (merr) MTestPrintError( merr ); merr = MPI_Info_free( &info ); if (merr) MTestPrintError( merr ); winName = "active-nolocks-all-different-win"; merr = MPI_Win_set_attr( *win, mem_keyval, (void *)1 ); if (merr) MTestPrintError( merr ); break; default: win_index = -1; } win_index++; return win_index; }
int main (int argc, char *argv[]) { int rank, destrank, nprocs, i; int page_size; char *A, *B; char *s_buf, *r_buf; MPI_Group comm_group, group; MPI_Win win; MPI_Info win_info; int size, no_hints = 0; double t_start=0.0, t_end=0.0; int skip = 1000; int loop = 10000; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(nprocs != 2) { if(rank == 0) { fprintf(stderr, "This test requires exactly two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } while (1) { static struct option long_options[] = {{"no-hints", no_argument, NULL, 'n'}, {0, 0, 0, 0}}; int option, index; option = getopt_long (argc, argv, "n::", long_options, &index); if (option == -1) { break; } switch (option) { case 'n': no_hints = 1; break; default: if (rank == 0) { fprintf(stderr, "Invalid Option \n"); } MPI_Finalize(); return EXIT_FAILURE; } } MPI_Alloc_mem (MYBUFSIZE, MPI_INFO_NULL, &A); if (NULL == A) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", rank); exit(-1); } if (no_hints == 0) { /* Providing MVAPICH2 specific hint to allocate memory * in shared space. MVAPICH2 optimizes communication * on windows created in this memory */ MPI_Info_create(&win_info); MPI_Info_set(win_info, "alloc_shm", "true"); MPI_Alloc_mem (MYBUFSIZE, win_info, &B); } else { MPI_Alloc_mem (MYBUFSIZE, MPI_INFO_NULL, &B); } if (NULL == B) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", rank); exit(-1); } page_size = getpagesize(); assert(page_size <= MAX_ALIGNMENT); s_buf = (char *) (((unsigned long) A + (page_size - 1)) / page_size * page_size); r_buf = (char *) (((unsigned long) B + (page_size - 1)) / page_size * page_size); memset(s_buf, 0, MAX_SIZE); memset(r_buf, 1, MAX_SIZE); if(rank == 0) { fprintf(stdout, HEADER); fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); fflush(stdout); } MPI_Comm_group(MPI_COMM_WORLD, &comm_group); for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : 1)) { if (size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } MPI_Win_create(r_buf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < skip + loop; i++) { MPI_Win_start (group, 0, win); if (i == skip) { t_start = MPI_Wtime (); } MPI_Put(s_buf, size, MPI_CHAR, 1, 0, size, MPI_CHAR, win); MPI_Win_complete(win); MPI_Win_post(group, 0, win); MPI_Win_wait(win); } t_end = MPI_Wtime (); } else { /* rank=1 */ destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < skip + loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); MPI_Win_start(group, 0, win); MPI_Put(s_buf, size, MPI_CHAR, 0, 0, size, MPI_CHAR, win); MPI_Win_complete(win); } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); } MPI_Group_free(&group); MPI_Win_free(&win); } if (no_hints == 0) { MPI_Info_free(&win_info); } MPI_Free_mem(A); MPI_Free_mem(B); MPI_Group_free(&comm_group); MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argc, char **argv) { int *buf, i, mynod, nprocs, len, b[3]; int errs=0, toterrs; MPI_Aint d[3]; MPI_File fh; MPI_Status status; char *filename; MPI_Datatype typevec, newtype, t[3]; MPI_Info info; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); if (nprocs != 2) { fprintf(stderr, "Run this program on two processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* process 0 takes the file name as a command-line argument and broadcasts it to other processes (length first, then string) */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: noncontig -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+1); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(SIZE*sizeof(int)); MPI_Type_vector(SIZE/2, 1, 2, MPI_INT, &typevec); /* create a struct type with explicitly set LB and UB; displacements * of typevec are such that the types for the two processes won't * overlap. */ b[0] = b[1] = b[2] = 1; d[0] = 0; d[1] = mynod*sizeof(int); d[2] = SIZE*sizeof(int); t[0] = MPI_LB; t[1] = typevec; t[2] = MPI_UB; /* keep the struct, ditch the vector */ MPI_Type_struct(3, b, d, t, &newtype); MPI_Type_commit(&newtype); MPI_Type_free(&typevec); MPI_Info_create(&info); /* I am setting these info values for testing purposes only. It is better to use the default values in practice. */ MPI_Info_set(info, "ind_rd_buffer_size", "1209"); MPI_Info_set(info, "ind_wr_buffer_size", "1107"); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* set the file view for each process -- now writes go into the non- * overlapping but interleaved region defined by the struct type up above */ MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); /* fill our buffer with a pattern and write, using our type again */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write(fh, buf, 1, newtype, &status); MPI_Barrier(MPI_COMM_WORLD); /* fill the entire buffer with -1's. read back with type. * note that the result of this read should be that every other value * in the buffer is still -1, as defined by our type. */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, 0, buf, 1, newtype, &status); /* check that all the values read are correct and also that we didn't * overwrite any of the -1 values that we shouldn't have. */ for (i=0; i<SIZE; i++) { if (!mynod) { if ((i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } if (!(i%2) && (buf[i] != i)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if ((i%2) && (buf[i] != i + mynod*SIZE)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } if (!(i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* in this case we write to either the first half or the second half * of the file space, so the regions are not interleaved. this is done * by leaving the file view at its default. */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status); MPI_Barrier(MPI_COMM_WORLD); /* same as before; fill buffer with -1's and then read; every other * value should still be -1 after the read */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status); /* verify that the buffer looks like it should */ for (i=0; i<SIZE; i++) { if (!mynod) { if ((i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } if (!(i%2) && (buf[i] != i)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if ((i%2) && (buf[i] != i + mynod*SIZE)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } if (!(i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* set the file view so that we have interleaved access again */ MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); /* this time write a contiguous buffer */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write(fh, buf, SIZE, MPI_INT, &status); MPI_Barrier(MPI_COMM_WORLD); /* fill buffer with -1's; this time they will all be overwritten */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, 0, buf, SIZE, MPI_INT, &status); for (i=0; i<SIZE; i++) { if (!mynod) { if (buf[i] != i) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if (buf[i] != i + mynod*SIZE) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } } } MPI_File_close(&fh); MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (mynod == 0) { if( toterrs > 0) { fprintf( stderr, "Found %d errors\n", toterrs ); } else { fprintf( stdout, " No Errors\n" ); } } MPI_Type_free(&newtype); MPI_Info_free(&info); free(buf); free(filename); MPI_Finalize(); return 0; }
void ADIOI_PLFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { static char myname[] = "ADIOI_PLFS_SETINFO"; char *value; int flag, tmp_val = -1, save_val = -1; int rank, i, gen_error_code; MPI_Comm_rank( fd->comm, &rank ); *error_code = MPI_SUCCESS; #ifdef ROMIO_CRAY /* Process any hints set with the MPICH_MPIIO_HINTS environment variable. */ ADIOI_CRAY_getenv_mpiio_hints(&users_info, fd); #endif /* ROMIO_CRAY */ // here's the way to check whether we're in container mode // in case we want to error out if people try non-sensical hints if (plfs_get_filetype(fd->filename) != CONTAINER) { // not currently do any checking here } // if the hint structure hasn't already been created // however, if users_info==MPI_INFO_NULL, maybe we don't need to do this if ((fd->info) == MPI_INFO_NULL) { MPI_Info_create(&(fd->info)); } /* * For every plfs hint, go through and check whether every rank * got the same value. If not, abort. Then copy the value from * the incoming MPI_Info into the ADIO_File info. */ if (users_info != MPI_INFO_NULL) { static const char *phints[] = { "plfs_disable_broadcast", /* don't have 0 broadcast to all */ "plfs_disable_paropen", /* don't do par_index_read */ "plfs_uniform_restart", /* only read one index file each */ NULL /* last must be NULL */ }; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); for(i = 0; phints[i] != NULL; i++) { MPI_Info_get(users_info, (char *)phints[i], MPI_MAX_INFO_VAL, value, &flag); if (flag) { save_val = tmp_val = atoi(value); MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != save_val) { /* same for all? */ FPRINTF(stderr, "%s: " "the value for key \"%s\" must be " "the same on all processes\n", myname, phints[i]); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Info_set(fd->info, (char *)phints[i], value); //fprintf(stderr, "rank %d: set %s -> %s\n",rank,phints[i],value); } } ADIOI_Free(value); } #ifdef ROMIO_CRAY /* --BEGIN CRAY ADDITION-- */ /* Calling the CRAY SetInfo() will add the Cray supported features: * - set the number of aggregators to the number of compute nodes * - MPICH_MPIIO_HINTS environment variable * - MPICH_MPIIO_HINTS_DISPLAY env var to display of hints values * - etc */ ADIOI_CRAY_SetInfo(fd, users_info, &gen_error_code); #else ADIOI_GEN_SetInfo(fd, users_info, &gen_error_code); #endif /* --END CRAY ADDITION-- */ /* If this function is successful, use the error code * returned from ADIOI_GEN_SetInfo * otherwise use the error_code generated by this function */ if(*error_code == MPI_SUCCESS) { *error_code = gen_error_code; } }
int main(int argc, char** argv) { extern int optind; char filename[256]; int i, j, verbose=1, rank, nprocs, err, nerrs=0; int myNX, G_NX, myOff, num_reqs; int ncid, cmode, varid, dimid[2], *reqs, *sts, **buf; MPI_Offset start[2], count[2]; MPI_Info info; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* get command-line arguments */ while ((i = getopt(argc, argv, "hq")) != EOF) switch(i) { case 'q': verbose = 0; break; case 'h': default: if (rank==0) usage(argv[0]); MPI_Finalize(); return 1; } if (argv[optind] == NULL) strcpy(filename, "testfile.nc"); else snprintf(filename, 256, "%s", argv[optind]); /* set an MPI-IO hint to disable file offset alignment for fixed-size * variables */ MPI_Info_create(&info); MPI_Info_set(info, "nc_var_align_size", "1"); cmode = NC_CLOBBER | NC_64BIT_DATA; err = ncmpi_create(MPI_COMM_WORLD, filename, cmode, info, &ncid); ERR MPI_Info_free(&info); /* the global array is NY * (NX * nprocs) */ G_NX = NX * nprocs; myOff = NX * rank; myNX = NX; if (verbose) printf("%2d: myOff=%3d myNX=%3d\n",rank,myOff,myNX); err = ncmpi_def_dim(ncid, "Y", NY, &dimid[0]); ERR err = ncmpi_def_dim(ncid, "X", G_NX, &dimid[1]); ERR err = ncmpi_def_var(ncid, "var", NC_INT, 2, dimid, &varid); ERR err = ncmpi_enddef(ncid); ERR /* First, fill the entire array with zeros, using a blocking I/O. Every process writes a subarray of size NY * myNX */ buf = (int**) malloc(myNX * sizeof(int*)); buf[0] = (int*) calloc(NY * myNX, sizeof(int)); start[0] = 0; start[1] = myOff; count[0] = NY; count[1] = myNX; err = ncmpi_put_vara_int_all(ncid, varid, start, count, buf[0]); free(buf[0]); /* initialize the buffer with rank ID. Also make the case interesting, by allocating buffers separately */ for (i=0; i<myNX; i++) { buf[i] = (int*) malloc(NY * sizeof(int)); for (j=0; j<NY; j++) buf[i][j] = rank; } reqs = (int*) malloc(myNX * sizeof(int)); sts = (int*) malloc(myNX * sizeof(int)); /* each proc writes myNX single columns of the 2D array */ start[0] = 0; start[1] = rank; count[0] = NY; count[1] = 1; if (verbose) printf("%2d: start=%3lld %3lld count=%3lld %3lld\n", rank, start[0],start[1], count[0],count[1]); num_reqs = 0; for (i=0; i<myNX; i++) { err = ncmpi_iput_vara_int(ncid, varid, start, count, buf[i], &reqs[num_reqs++]); ERR start[1] += nprocs; } err = ncmpi_wait_all(ncid, num_reqs, reqs, sts); ERR /* check status of all requests */ for (i=0; i<num_reqs; i++) if (sts[i] != NC_NOERR) printf("Error at line %d in %s: nonblocking write fails on request %d (%s)\n", __LINE__,__FILE__,i, ncmpi_strerror(sts[i])); err = ncmpi_close(ncid); ERR /* read back using the same access pattern */ err = ncmpi_open(MPI_COMM_WORLD, filename, NC_NOWRITE, info, &ncid); ERR err = ncmpi_inq_varid(ncid, "var", &varid); ERR for (i=0; i<myNX; i++) for (j=0; j<NY; j++) buf[i][j] = -1; /* each proc reads myNX single columns of the 2D array */ start[0] = 0; start[1] = rank; count[0] = NY; count[1] = 1; num_reqs = 0; for (i=0; i<myNX; i++) { err = ncmpi_iget_vara_int(ncid, varid, start, count, buf[i], &reqs[num_reqs++]); ERR start[1] += nprocs; } err = ncmpi_wait_all(ncid, num_reqs, reqs, sts); ERR /* check status of all requests */ for (i=0; i<num_reqs; i++) if (sts[i] != NC_NOERR) printf("Error at line %d in %s: nonblocking write fails on request %d (%s)\n", __LINE__,__FILE__,i, ncmpi_strerror(sts[i])); for (i=0; i<myNX; i++) { for (j=0; j<NY; j++) if (buf[i][j] != rank) printf("Error at line %d in %s: expect buf[%d][%d]=%d but got %d\n", __LINE__,__FILE__,i,j,rank,buf[i][j]); } err = ncmpi_close(ncid); ERR free(sts); free(reqs); for (i=0; i<myNX; i++) free(buf[i]); free(buf); /* check if there is any PnetCDF internal malloc residue */ MPI_Offset malloc_size, sum_size; err = ncmpi_inq_malloc_size(&malloc_size); if (err == NC_NOERR) { MPI_Reduce(&malloc_size, &sum_size, 1, MPI_OFFSET, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && sum_size > 0) printf("heap memory allocated by PnetCDF internally has %lld bytes yet to be freed\n", sum_size); } MPI_Finalize(); return (nerrs > 0); }
int PIOc_Init_Intracomm(const MPI_Comm comp_comm, const int num_iotasks, const int stride, const int base,const int rearr, int *iosysidp) { iosystem_desc_t *iosys; int ierr = PIO_NOERR; int ustride; int lbase; int mpierr; iosys = (iosystem_desc_t *) malloc(sizeof(iosystem_desc_t)); /* Copy the computation communicator into union_comm. */ mpierr = MPI_Comm_dup(comp_comm, &iosys->union_comm); CheckMPIReturn(mpierr, __FILE__, __LINE__); if (mpierr) ierr = PIO_EIO; /* Copy the computation communicator into comp_comm. */ if (!ierr) { mpierr = MPI_Comm_dup(comp_comm, &iosys->comp_comm); CheckMPIReturn(mpierr, __FILE__, __LINE__); if (mpierr) ierr = PIO_EIO; } if (!ierr) { iosys->my_comm = iosys->comp_comm; iosys->io_comm = MPI_COMM_NULL; iosys->intercomm = MPI_COMM_NULL; iosys->error_handler = PIO_INTERNAL_ERROR; iosys->async_interface= false; iosys->compmaster = false; iosys->iomaster = false; iosys->ioproc = false; iosys->default_rearranger = rearr; iosys->num_iotasks = num_iotasks; ustride = stride; /* Find MPI rank and number of tasks in comp_comm communicator. */ CheckMPIReturn(MPI_Comm_rank(iosys->comp_comm, &(iosys->comp_rank)),__FILE__,__LINE__); CheckMPIReturn(MPI_Comm_size(iosys->comp_comm, &(iosys->num_comptasks)),__FILE__,__LINE__); if(iosys->comp_rank==0) iosys->compmaster = true; /* Ensure that settings for number of computation tasks, number * of IO tasks, and the stride are reasonable. */ if((iosys->num_comptasks == 1) && (num_iotasks*ustride > 1)) { // This is a serial run with a bad configuration. Set up a single task. fprintf(stderr, "PIO_TP PIOc_Init_Intracomm reset stride and tasks.\n"); iosys->num_iotasks = 1; ustride = 1; } if((iosys->num_iotasks < 1) || (((iosys->num_iotasks-1)*ustride+1) > iosys->num_comptasks)){ fprintf(stderr, "PIO_TP PIOc_Init_Intracomm error\n"); fprintf(stderr, "num_iotasks=%d, ustride=%d, num_comptasks=%d\n", num_iotasks, ustride, iosys->num_comptasks); return PIO_EBADID; } /* Create an array that holds the ranks of the tasks to be used for IO. */ iosys->ioranks = (int *) calloc(sizeof(int), iosys->num_iotasks); for(int i=0;i< iosys->num_iotasks; i++){ iosys->ioranks[i] = (base + i*ustride) % iosys->num_comptasks; if(iosys->ioranks[i] == iosys->comp_rank) iosys->ioproc = true; } iosys->ioroot = iosys->ioranks[0]; /* Create an MPI info object. */ CheckMPIReturn(MPI_Info_create(&(iosys->info)),__FILE__,__LINE__); iosys->info = MPI_INFO_NULL; if(iosys->comp_rank == iosys->ioranks[0]) iosys->iomaster = true; /* Create a group for the computation tasks. */ CheckMPIReturn(MPI_Comm_group(iosys->comp_comm, &(iosys->compgroup)),__FILE__,__LINE__); /* Create a group for the IO tasks. */ CheckMPIReturn(MPI_Group_incl(iosys->compgroup, iosys->num_iotasks, iosys->ioranks, &(iosys->iogroup)),__FILE__,__LINE__); /* Create an MPI communicator for the IO tasks. */ CheckMPIReturn(MPI_Comm_create(iosys->comp_comm, iosys->iogroup, &(iosys->io_comm)),__FILE__,__LINE__); /* For the tasks that are doing IO, get their rank. */ if(iosys->ioproc) CheckMPIReturn(MPI_Comm_rank(iosys->io_comm, &(iosys->io_rank)),__FILE__,__LINE__); else iosys->io_rank = -1; iosys->union_rank = iosys->comp_rank; /* Add this iosys struct to the list in the PIO library. */ *iosysidp = pio_add_to_iosystem_list(iosys); pio_get_env(); /* allocate buffer space for compute nodes */ compute_buffer_init(*iosys); } return ierr; }
void writehdf5file(rundata_t rundata, double **dens, double ***vel) { /* identifiers */ hid_t file_id, arr_group_id, dens_dataset_id, vel_dataset_id; hid_t dens_dataspace_id, vel_dataspace_id; hid_t loc_dens_dataspace_id, loc_vel_dataspace_id; hid_t globaldensspace,globalvelspace; hid_t dist_id; hid_t fap_id; /* sizes */ hsize_t densdims[2], veldims[3]; hsize_t locdensdims[2], locveldims[3]; /* status */ herr_t status; /* MPI-IO hints for performance */ MPI_Info info; /* parameters of the hyperslab */ hsize_t counts[3]; hsize_t strides[3]; hsize_t offsets[3]; hsize_t blocks[3]; /* set the MPI-IO hints for better performance on GPFS */ MPI_Info_create(&info); MPI_Info_set(info,"IBM_largeblock_io","true"); /* Set up the parallel environment for file access*/ fap_id = H5Pcreate(H5P_FILE_ACCESS); /* Include the file access property with IBM hint */ H5Pset_fapl_mpio(fap_id, MPI_COMM_WORLD, info); /* Set up the parallel environment */ dist_id = H5Pcreate(H5P_DATASET_XFER); /* we'll be writing collectively */ H5Pset_dxpl_mpio(dist_id, H5FD_MPIO_COLLECTIVE); /* Create a new file - truncate anything existing, use default properties */ file_id = H5Fcreate(rundata.filename, H5F_ACC_TRUNC, H5P_DEFAULT, fap_id); /* HDF5 routines generally return a negative number on failure. * Should check return values! */ if (file_id < 0) { fprintf(stderr,"Could not open file %s\n", rundata.filename); return; } /* Create a new group within the new file */ arr_group_id = H5Gcreate(file_id,"/ArrayData", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); /* Give this group an attribute listing the time of calculation */ { hid_t attr_id,attr_sp_id; struct tm *t; time_t now; int yyyymm; now = time(NULL); t = localtime(&now); yyyymm = (1900+t->tm_year)*100+t->tm_mon; attr_sp_id = H5Screate(H5S_SCALAR); attr_id = H5Acreate(arr_group_id, "Calculated on (YYYYMM)", H5T_STD_U32LE, attr_sp_id, H5P_DEFAULT, H5P_DEFAULT); printf("yymm = %d\n",yyyymm); H5Awrite(attr_id, H5T_NATIVE_INT, &yyyymm); H5Aclose(attr_id); H5Sclose(attr_sp_id); } /* Create the data space for the two global datasets. */ densdims[0] = rundata.globalnx; densdims[1] = rundata.globalny; veldims[0] = 2; veldims[1] = rundata.globalnx; veldims[2] = rundata.globalny; dens_dataspace_id = H5Screate_simple(2, densdims, NULL); vel_dataspace_id = H5Screate_simple(3, veldims, NULL); /* Create the datasets within the file. * H5T_IEEE_F64LE is a standard (IEEE) double precision (64 bit) floating (F) data type * and will work on any machine. H5T_NATIVE_DOUBLE would work too, but would give * different results on GPC and TCS */ dens_dataset_id = H5Dcreate(file_id, "/ArrayData/dens", H5T_IEEE_F64LE, dens_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); vel_dataset_id = H5Dcreate(file_id, "/ArrayData/vel", H5T_IEEE_F64LE, vel_dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); /* Now create the data space for our sub-regions. These are the data spaces * of our actual local data in memory. */ locdensdims[0] = rundata.localnx; locdensdims[1] = rundata.localny; locveldims[0] = 2; locveldims[1] = rundata.localnx; locveldims[2] = rundata.localny; loc_dens_dataspace_id = H5Screate_simple(2, locdensdims, NULL); loc_vel_dataspace_id = H5Screate_simple(3, locveldims, NULL); /* * * Now we have to figure out the `hyperslab' within the global * data that corresponds to our local data. * * Hyperslabs are described by an array of counts, strides, offsets, * and block sizes. * * |-offx--| * +-------|----|-------+ -+- * | | | * | | offy * | | | * - +----+ - -+- * | | | | | * | | | | localny * | | | | | * - +----+ - -+- * | | * | | * +-------|----|-------+ * localnx * * In this case the blocksizes are (localnx,localny) and the offsets are * (offx,offy) = ((myx)/nxp*globalnx, (myy/nyp)*globalny) */ offsets[0] = (rundata.globalnx/rundata.npx)*rundata.myx; offsets[1] = (rundata.globalny/rundata.npy)*rundata.myy; blocks[0] = rundata.localnx; blocks[1] = rundata.localny; strides[0] = strides[1] = 1; counts[0] = counts[1] = 1; /* select this subset of the density variable's space in the file */ globaldensspace = H5Dget_space(dens_dataset_id); H5Sselect_hyperslab(globaldensspace,H5S_SELECT_SET, offsets, strides, counts, blocks); /* For the velocities, it's the same thing but there's a count of two, * (one for each velocity component) */ offsets[1] = (rundata.globalnx/rundata.npx)*rundata.myx; offsets[2] = (rundata.globalny/rundata.npy)*rundata.myy; blocks[1] = rundata.localnx; blocks[2] = rundata.localny; strides[0] = strides[1] = strides[2] = 1; counts[0] = 2; counts[1] = counts[2] = 1; offsets[0] = 0; blocks[0] = 1; globalvelspace = H5Dget_space(vel_dataset_id); H5Sselect_hyperslab(globalvelspace,H5S_SELECT_SET, offsets, strides, counts, blocks); /* Write the data. We're writing it from memory, where it is saved * in NATIVE_DOUBLE format */ status = H5Dwrite(dens_dataset_id, H5T_NATIVE_DOUBLE, loc_dens_dataspace_id, globaldensspace, dist_id, &(dens[0][0])); status = H5Dwrite(vel_dataset_id, H5T_NATIVE_DOUBLE, loc_vel_dataspace_id, globalvelspace, dist_id, &(vel[0][0][0])); /* We'll create another group for related info and put some things in there */ { hid_t other_group_id; hid_t timestep_id, timestep_space; hid_t comptime_id, comptime_space; hid_t author_id, author_space, author_type; char *authorname="Jonathan Dursi"; int timestep=13; float comptime=81.773; /* create group */ other_group_id = H5Gcreate(file_id,"/OtherStuff", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); /* scalar space, data for integer timestep */ timestep_space = H5Screate(H5S_SCALAR); timestep_id = H5Dcreate(other_group_id, "Timestep", H5T_STD_U32LE, timestep_space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); status = H5Dwrite(timestep_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, ×tep); H5Dclose(timestep_id); H5Sclose(timestep_space); /* scalar space, data for floating compute time */ comptime_space = H5Screate(H5S_SCALAR); comptime_id = H5Dcreate(other_group_id, "Compute Time", H5T_IEEE_F32LE, comptime_space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); status = H5Dwrite(comptime_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &comptime); H5Dclose(comptime_id); H5Sclose(comptime_space); /* scalar space, data for author name */ author_space = H5Screate(H5S_SCALAR); author_type = H5Tcopy(H5T_C_S1); /* copy the character type.. */ status = H5Tset_size (author_type, strlen(authorname)); /* and make it longer */ author_id = H5Dcreate(other_group_id, "Simulator Name", author_type, author_space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); status = H5Dwrite(author_id, author_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, authorname); H5Dclose(author_id); H5Sclose(author_space); H5Tclose(author_type); H5Gclose(other_group_id); } /* End access to groups & data sets and release resources used by them */ status = H5Sclose(dens_dataspace_id); status = H5Dclose(dens_dataset_id); status = H5Sclose(vel_dataspace_id); status = H5Dclose(vel_dataset_id); status = H5Gclose(arr_group_id); status = H5Pclose(fap_id); status = H5Pclose(dist_id); /* Close the file */ status = H5Fclose(file_id); return; }
/*-------------------------------------------------------------------------*/ int FTI_InitMpiICP(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_dataset* FTI_Data) { int res; FTI_Print("I/O mode: MPI-IO.", FTI_DBUG); char str[FTI_BUFS], mpi_err[FTI_BUFS]; // enable collective buffer optimization MPI_Info info; MPI_Info_create(&info); MPI_Info_set(info, "romio_cb_write", "enable"); /* * update ckpt file name (neccessary for the restart!) * not very nice TODO we should think about another mechanism */ snprintf(FTI_Exec->meta[0].ckptFile, FTI_BUFS, "Ckpt%d-Rank%d.fti", FTI_Exec->ckptID, FTI_Topo->myRank); // TODO enable to set stripping unit in the config file (Maybe also other hints) // set stripping unit to 4MB MPI_Info_set(info, "stripping_unit", "4194304"); char gfn[FTI_BUFS], ckptFile[FTI_BUFS]; snprintf(ckptFile, FTI_BUFS, "Ckpt%d-mpiio.fti", FTI_Exec->ckptID); snprintf(gfn, FTI_BUFS, "%s/%s", FTI_Conf->gTmpDir, ckptFile); // open parallel file (collective call) MPI_File pfh; #ifdef LUSTRE if (FTI_Topo->splitRank == 0) { res = llapi_file_create(gfn, FTI_Conf->stripeUnit, FTI_Conf->stripeOffset, FTI_Conf->stripeFactor, 0); if (res) { char error_msg[FTI_BUFS]; error_msg[0] = 0; strerror_r(-res, error_msg, FTI_BUFS); snprintf(str, FTI_BUFS, "[Lustre] %s.", error_msg); FTI_Print(str, FTI_WARN); } else { snprintf(str, FTI_BUFS, "[LUSTRE] file:%s striping_unit:%i striping_factor:%i striping_offset:%i", ckptFile, FTI_Conf->stripeUnit, FTI_Conf->stripeFactor, FTI_Conf->stripeOffset); FTI_Print(str, FTI_DBUG); } } #endif res = MPI_File_open(FTI_COMM_WORLD, gfn, MPI_MODE_WRONLY|MPI_MODE_CREATE, info, &pfh); // check if successful if (res != 0) { errno = 0; int reslen; MPI_Error_string(res, mpi_err, &reslen); snprintf(str, FTI_BUFS, "unable to create file %s [MPI ERROR - %i] %s", gfn, res, mpi_err); FTI_Print(str, FTI_EROR); return FTI_NSCS; } MPI_Offset chunkSize = FTI_Exec->ckptSize; // collect chunksizes of other ranks MPI_Offset* chunkSizes = talloc(MPI_Offset, FTI_Topo->nbApprocs * FTI_Topo->nbNodes); MPI_Allgather(&chunkSize, 1, MPI_OFFSET, chunkSizes, 1, MPI_OFFSET, FTI_COMM_WORLD); // set file offset MPI_Offset offset = 0; int i; for (i = 0; i < FTI_Topo->splitRank; i++) { offset += chunkSizes[i]; } free(chunkSizes); FTI_Exec->iCPInfo.offset = offset; memcpy( FTI_Exec->iCPInfo.fh, &pfh, sizeof(FTI_MI_FH) ); MPI_Info_free(&info); return FTI_SCES; }
int main (int argc, char *argv[]) { int myid, numprocs, i, j; int size, page_size; char *s_buf, *r_buf; char *s_buf1, *r_buf1; double t_start = 0.0, t_end = 0.0, t = 0.0; int destrank, no_hints = 0; MPI_Group comm_group, group; MPI_Win win; MPI_Info win_info; int loop = 100; int window_size = 32; int skip = 20; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_group(MPI_COMM_WORLD, &comm_group); if (numprocs != 2) { if (myid == 0) { fprintf(stderr, "This test requires exactly two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } while (1) { static struct option long_options[] = {{"no-hints", no_argument, NULL, 'n'}, {0, 0, 0, 0}}; int option, index; option = getopt_long (argc, argv, "n::", long_options, &index); if (option == -1) { break; } switch (option) { case 'n': no_hints = 1; break; default: if (myid == 0) { fprintf(stderr, "Invalid Option \n"); } MPI_Finalize(); return EXIT_FAILURE; } } page_size = getpagesize(); assert(page_size <= MAX_ALIGNMENT); MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, MPI_INFO_NULL, &s_buf1); if (NULL == s_buf1) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", myid); exit(-1); } if (no_hints == 0) { /* Providing MVAPICH2 specific hint to allocate memory * in shared space. MVAPICH2 optimizes communication * on windows created in this memory */ MPI_Info_create(&win_info); MPI_Info_set(win_info, "alloc_shm", "true"); MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, win_info, &r_buf1); } else { MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, MPI_INFO_NULL, &r_buf1); } if (NULL == r_buf1) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", myid); exit(-1); } s_buf = (char *) (((unsigned long) s_buf1 + (page_size - 1)) / page_size * page_size); r_buf = (char *) (((unsigned long) r_buf1 + (page_size - 1)) / page_size * page_size); assert((s_buf != NULL) && (r_buf != NULL)); memset(s_buf, 0, MAX_MSG_SIZE*window_size); memset(r_buf, 1, MAX_MSG_SIZE*window_size); if (myid == 0) { fprintf(stdout, HEADER); fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Bandwidth (MB/s)"); fflush(stdout); } /* Bandwidth test */ for (size = 1; size <= MAX_MSG_SIZE; size *= 2) { if (size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; window_size = WINDOW_SIZE_LARGE; } /* Window creation and warming-up */ MPI_Win_create(r_buf, size * window_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (myid == 0) { destrank = 1; MPI_Group_incl (comm_group, 1, &destrank, &group); for (i = 0; i < skip; i++) { MPI_Win_start(group, 0, win); MPI_Put(s_buf + i*size, size, MPI_CHAR, 1, i*size, size, MPI_CHAR, win); MPI_Win_complete(win); } } else { /*rank 1*/ destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); for (i = 0; i < skip; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); } } MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) { t_start = MPI_Wtime(); for (i = 0; i < loop; i++) { MPI_Win_start(group, 0, win); for(j = 0; j < window_size; j++) { MPI_Put(s_buf + j*size, size, MPI_CHAR, 1, j*size, size, MPI_CHAR, win); } MPI_Win_complete(win); } t_end = MPI_Wtime(); t = t_end - t_start; } else { for (i = 0; i < loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); } } MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) { double tmp = size / 1e6 * loop * window_size; fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, tmp / t); fflush(stdout); } MPI_Group_free(&group); MPI_Win_free(&win); } MPI_Barrier(MPI_COMM_WORLD); if (no_hints == 0) { MPI_Info_free(&win_info); } MPI_Free_mem(s_buf1); MPI_Free_mem(r_buf1); MPI_Group_free(&comm_group); MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argc, char **argv) { int i, j, k; int status; int ncid; int dimid1, dimid2, dimid3, udimid; int square_dim[2], cube_dim[3], xytime_dim[3], time_dim[1]; MPI_Offset square_start[2], cube_start[3] = {0, 0, 0}; MPI_Offset square_count[2] = {50, 50}, cube_count[3] = {100, 50, 50}; MPI_Offset xytime_start[3] = {0, 0, 0}; MPI_Offset xytime_count[3] = {100, 50, 50}; MPI_Offset time_start[1], time_count[1] = {25}; int square_id, cube_id, xytime_id, time_id; static char title[] = "example netCDF dataset"; static char description[] = "2-D integer array"; double data[100][50][50], buffer[100]; int rank; int nprocs; MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info; params opts; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rank == 0) fprintf(stderr, "Testing independent write ... "); parse_write_args(argc, argv, rank, &opts); /********** START OF NETCDF ACCESS **************/ MPI_Info_create(&info); MPI_Info_set(info, "striping_factor", "4"); MPI_Info_set(info, "striping_unit", "20000"); MPI_Info_set(info, "start_iodevice", "0"); /** * Create the dataset * File name: "testwrite.nc" * Dataset API: Collective */ status = ncmpi_create(comm, opts.outfname, NC_CLOBBER, info, &ncid); if (status != NC_NOERR) handle_error(status); /** * Create a global attribute: * :title = "example netCDF dataset"; */ status = ncmpi_put_att_text (ncid, NC_GLOBAL, "title", strlen(title), title); if (status != NC_NOERR) handle_error(status); /** * Add 4 pre-defined dimensions: * x = 100, y = 100, z = 100, time = NC_UNLIMITED */ status = ncmpi_def_dim(ncid, "x", 100L, &dimid1); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_dim(ncid, "y", 100L, &dimid2); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_dim(ncid, "z", 100L, &dimid3); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_dim(ncid, "time", NC_UNLIMITED, &udimid); if (status != NC_NOERR) handle_error(status); /** * Define the dimensionality and then add 4 variables: * square(x, y), cube(x,y,z), time(time), xytime(time, x, y) */ square_dim[0] = cube_dim[0] = xytime_dim[1] = dimid1; square_dim[1] = cube_dim[1] = xytime_dim[2] = dimid2; cube_dim[2] = dimid3; xytime_dim[0] = udimid; time_dim[0] = udimid; status = ncmpi_def_var (ncid, "square", NC_DOUBLE, 2, square_dim, &square_id); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_var (ncid, "cube", NC_DOUBLE, 3, cube_dim, &cube_id); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_var (ncid, "time", NC_DOUBLE, 1, time_dim, &time_id); if (status != NC_NOERR) handle_error(status); status = ncmpi_def_var (ncid, "xytime", NC_DOUBLE, 3, xytime_dim, &xytime_id); if (status != NC_NOERR) handle_error(status); /** * Add an attribute for variable: * square: decsription = "2-D integer array" */ status = ncmpi_put_att_text (ncid, square_id, "description", strlen(description), description); if (status != NC_NOERR) handle_error(status); /** * End Define Mode (switch to data mode) * Dataset API: Collective */ status = ncmpi_enddef(ncid); if (status != NC_NOERR) handle_error(status); /** * Data Partition (Assume 4 processors): * square: 2-D, (Block, Block), 50*50 from 100*100 * cube: 3-D, (*, Block, Block), 100*50*50 from 100*100*100 * xytime: 3-D, (*, Block, Block), 100*50*50 from 100*100*100 * time: 1-D, Block-wise, 25 from 100 */ square_start[0] = cube_start[1] = xytime_start[1] = (rank/2) * 50; square_start[1] = cube_start[2] = xytime_start[2] = (rank%2) * 50; time_start[0] = (rank%4) * 25; /** * Packing data in the buffer */ /* Data for variable: time */ for ( i = time_start[0]; i < time_start[0] + time_count[0]; i++ ) buffer[i - time_start[0]] = i; /* Data for variable: square, cube and xytime */ for ( i = 0; i < 100; i++ ) for ( j = square_start[0]; j < square_start[0]+square_count[0]; j++ ) for ( k = square_start[1]; k < square_start[1]+square_count[1]; k++ ) data[i][j-square_start[0]][k-square_start[1]] = i*100*100 + j*100 + k; /** * Write data into variables: square, cube, time and xytime * Access Method: subarray * Data Mode API: non-collective */ status = ncmpi_begin_indep_data(ncid); if (status != NC_NOERR) handle_error(status); status = ncmpi_put_vara_double(ncid, square_id, square_start, square_count, &data[0][0][0]); if (status != NC_NOERR) handle_error(status); status = ncmpi_put_vara_double(ncid, cube_id, cube_start, cube_count, &data[0][0][0]); if (status != NC_NOERR) handle_error(status); status = ncmpi_put_vara_double(ncid, time_id, time_start, time_count, (double *)buffer); if (status != NC_NOERR) handle_error(status); status = ncmpi_put_vara_double(ncid, xytime_id, xytime_start, xytime_count, &data[0][0][0]); if (status != NC_NOERR) handle_error(status); { /** * Change a single element and then change it back * Access Method: single value * Data Mode API: non-collective */ double singlevalue = 0; ncmpi_sync(ncid); status = ncmpi_put_var1_double(ncid, square_id, square_start, &singlevalue); if (status != NC_NOERR) handle_error(status); status = ncmpi_put_var1_double(ncid, time_id, time_start, &singlevalue); if (status != NC_NOERR) handle_error(status); ncmpi_sync(ncid); singlevalue = square_start[0]*100 + square_start[1]; status = ncmpi_put_var1_double(ncid, square_id, square_start, &singlevalue); if (status != NC_NOERR) handle_error(status); singlevalue = time_start[0]; status = ncmpi_put_var1_double(ncid, time_id, time_start, &singlevalue); if (status != NC_NOERR) handle_error(status); } { /** * Change the whole array for time[] and then change it back * Access Method: whole array * Data Mode API: non-collective */ ncmpi_sync(ncid); for (i = 0; i < 100; i++ ) buffer[i] = 0; if (rank == 0) { status = ncmpi_put_var_double(ncid, time_id, buffer); if (status != NC_NOERR) handle_error(status); } ncmpi_sync(ncid); for (i=0; i<100; i++) buffer[i] = i; if (rank == 1) { status = ncmpi_put_var_double(ncid, time_id, buffer); if (status != NC_NOERR) handle_error(status); } } status = ncmpi_end_indep_data(ncid); if (status != NC_NOERR) handle_error(status); /** * Close the dataset * Dataset API: collective */ status = ncmpi_close(ncid); if (status != NC_NOERR) handle_error(status); MPI_Info_free(&info); /******************* END OF NETCDF ACCESS ****************/ if (rank == 0) fprintf(stderr, "OK\nFile written to: %s!\n", opts.outfname); MPI_Finalize(); return 0; }
int FileIO::create(Setup *setup) { hid_t file_plist = H5Pcreate(H5P_FILE_ACCESS); #ifdef GKC_PARALLEL_MPI // pass some information onto the underlying MPI_File_open call MPI_Info file_info; check(MPI_Info_create(&file_info), DMESG("File info")); /* H5Pset_sieve_buf_size(file_plist, 262144); H5Pset_alignment(file_plist, 524288, 262144); MPI_Info_set(file_info, (char *) "access_style" , (char *) "write_once"); MPI_Info_set(file_info, (char *) "collective_buffering", (char *) "true"); MPI_Info_set(file_info, (char *) "cb_block_size" , (char *) "1048576"); MPI_Info_set(file_info, (char *) "cb_buffer_size" , (char *) "4194304"); * */ check( H5Pset_fapl_mpio(file_plist, parallel->Comm[DIR_ALL], file_info), DMESG("Set MPI Property")); #endif file = check(H5Fcreate(outputFileName.c_str(), (overwriteFile ? H5F_ACC_TRUNC : H5F_ACC_EXCL), H5P_DEFAULT, file_plist ), DMESG("H5FCreate : HDF5 File (File already exists ? use -f to overwrite) : " + outputFileName)); check( H5Pclose(file_plist), DMESG("H5Pclose")); #ifdef GKC_PARALLEL_MPI MPI_Info_free(&file_info); #endif //////////////////////////////////////////////////////////////// Info Group //////////////////////////////////////////////////////// hid_t infoGroup = check(H5Gcreate(file, "/Info",H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT), DMESG("Error creating group file for Phasespace : H5Gcreate")); check(H5LTset_attribute_string(infoGroup, ".", "Output", outputFileName.c_str()), DMESG("H5LTset_attribute")); check(H5LTset_attribute_string(infoGroup, ".", "Input", inputFileName.c_str()), DMESG("H5LTset_attribute")); check(H5LTset_attribute_string(infoGroup, ".", "Version", PACKAGE_VERSION), DMESG("H5LTset_attribute")); // Some Simulation specific stuff //check(H5LTset_attribute_string(infoGroup, ".", "Solver", ((setup->Solver & VL_LIN) ? "Linear" : "Non-Linear")), DMESG("H5LTset_attribute")); //heck(H5LTset_attribute_string(infoGroup, ".", "Type", ((setup->VlasovType & VLASOV_LOCAL ) ? "Local" : "Global" )), DMESG("H5LTset_attribute")); //heck(H5LTset_attribute_string(infoGroup, ".", "FFTSolverS", ((setup->VlasovType & VLASOV_LOCAL ) ? "Local" : "Global" )), DMESG("H5LTset_attribute")); //check(H5LTset_attribute_string(infoGroup, ".", "Initial Condition", setup->PerturbationMethod.c_str()), DMESG("H5LTset_attribute")); check(H5LTset_attribute_string(infoGroup, ".", "Info", info.c_str()), DMESG("H5LTset_attribute")); check(H5LTset_attribute_string(infoGroup, ".", "Config", setup->configFileString.c_str()), DMESG("H5LTset_attribute")); H5Gclose(infoGroup); /// Wrote setup constants, ugly here //// hid_t constantsGroup = check(H5Gcreate(file, "/Constants",H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT), DMESG("Error creating group file for Phasespace : H5Gcreate")); // if (!setup->parser_constants.empty()) { std::vector<std::string> const_vec = Setup::split(setup->parser_constants, ","); for(int s = 0; s < const_vec.size(); s++) { std::vector<std::string> key_value = Setup::split(const_vec[s],"="); double value = Setup::string_to_double(key_value[1]); int dim[] = { 1 }; // check(H5LTmake_dataset_double(constantsGroup, Setup::trimLower(key_value[0], false).c_str(), 1, dim, &value ), DMESG("Write Constants Attributes")); check(H5LTset_attribute_double(constantsGroup, ".", Setup::trimLower(key_value[0], false).c_str(), &value, 1), DMESG("H5LTset_attribute")); //check(H5LTset_attribute_double(constantsGroup, ".", Setup::trimLower(key_value[0], false).c_str(), &(Setup::string_to_double(key_value[1])), 1), DMESG("H5LTset_attribute")); }; } H5Gclose(constantsGroup); // ********************* setup Table for CFL *****************88 cfl_table = new CFLTable(); cfl_offset[0] = HOFFSET( CFLTable, timeStep ); cfl_offset[1] = HOFFSET( CFLTable, time ); cfl_offset[2] = HOFFSET( CFLTable, Fx ); cfl_offset[3] = HOFFSET( CFLTable, Fy ); cfl_offset[4] = HOFFSET( CFLTable, Fz ); cfl_offset[5] = HOFFSET( CFLTable, Fv ); cfl_offset[6] = HOFFSET( CFLTable, total ); for(int i = 1; i < 7; i++) cfl_sizes[i] = sizeof(double); cfl_sizes[0] = sizeof(int); hid_t cfl_type[7]; for(int i = 1; i < 7; i++) cfl_type [i] = H5T_NATIVE_DOUBLE; cfl_type[0] = H5T_NATIVE_INT; const char *cfl_names[7]; cfl_names[0] = "timeStep"; cfl_names[1] = "time"; cfl_names[2] = "Fx"; cfl_names[3] = "Fy"; cfl_names[4] = "Fz"; cfl_names[5] = "Fv"; cfl_names[6] = "Total"; check(H5TBmake_table("cflTable", file, "cfl", (hsize_t) 7, (hsize_t) 0, sizeof(CFLTable), (const char**) cfl_names, cfl_offset, cfl_type, 32, NULL, 0, cfl_table ), DMESG("H5Tmake_table : cfl")); return HELIOS_SUCCESS; }
void ADIO_Init(int *argc, char ***argv, int *error_code) { #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE) char *c; #endif ADIOI_UNREFERENCED_ARG(argc); ADIOI_UNREFERENCED_ARG(argv); /* initialize the linked list containing flattened datatypes */ ADIOI_Flatlist = (ADIOI_Flatlist_node *) ADIOI_Malloc(sizeof(ADIOI_Flatlist_node)); ADIOI_Flatlist->type = MPI_DATATYPE_NULL; ADIOI_Flatlist->next = NULL; ADIOI_Flatlist->blocklens = NULL; ADIOI_Flatlist->indices = NULL; #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE) c = getenv("MPIO_DIRECT_READ"); if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) ADIOI_Direct_read = 1; else ADIOI_Direct_read = 0; c = getenv("MPIO_DIRECT_WRITE"); if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) ADIOI_Direct_write = 1; else ADIOI_Direct_write = 0; #endif /* Assume system-wide hints won't change between runs: move hint processing * from ADIO_Open to here */ /* FIXME should be checking error code from MPI_Info_create here */ MPI_Info_create(&ADIOI_syshints); ADIOI_process_system_hints(ADIOI_syshints); #ifdef ADIOI_MPE_LOGGING { MPE_Log_get_state_eventIDs( &ADIOI_MPE_open_a, &ADIOI_MPE_open_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_read_a, &ADIOI_MPE_read_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_write_a, &ADIOI_MPE_write_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_lseek_a, &ADIOI_MPE_lseek_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_close_a, &ADIOI_MPE_close_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_writelock_a, &ADIOI_MPE_writelock_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_readlock_a, &ADIOI_MPE_readlock_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_unlock_a, &ADIOI_MPE_unlock_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_postwrite_a, &ADIOI_MPE_postwrite_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a, &ADIOI_MPE_openinternal_b); MPE_Log_get_state_eventIDs( &ADIOI_MPE_stat_a, &ADIOI_MPE_stat_b); MPE_Log_get_state_eventIDs( &ADIOI_MPE_iread_a, &ADIOI_MPE_iread_b); MPE_Log_get_state_eventIDs( &ADIOI_MPE_iwrite_a, &ADIOI_MPE_iwrite_b); int comm_world_rank; MPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank ); if ( comm_world_rank == 0 ) { MPE_Describe_state( ADIOI_MPE_open_a, ADIOI_MPE_open_b, "open", "orange" ); MPE_Describe_state( ADIOI_MPE_read_a, ADIOI_MPE_read_b, "read", "green" ); MPE_Describe_state( ADIOI_MPE_write_a, ADIOI_MPE_write_b, "write", "blue" ); MPE_Describe_state( ADIOI_MPE_lseek_a, ADIOI_MPE_lseek_b, "lseek", "red" ); MPE_Describe_state( ADIOI_MPE_close_a, ADIOI_MPE_close_b, "close", "grey" ); MPE_Describe_state( ADIOI_MPE_writelock_a, ADIOI_MPE_writelock_b, "writelock", "plum" ); MPE_Describe_state( ADIOI_MPE_readlock_a, ADIOI_MPE_readlock_b, "readlock", "magenta" ); MPE_Describe_state( ADIOI_MPE_unlock_a, ADIOI_MPE_unlock_b, "unlock", "purple" ); MPE_Describe_state( ADIOI_MPE_postwrite_a, ADIOI_MPE_postwrite_b, "postwrite", "ivory" ); MPE_Describe_state( ADIOI_MPE_openinternal_a, ADIOI_MPE_openinternal_b, "open system", "blue"); MPE_Describe_state( ADIOI_MPE_stat_a, ADIOI_MPE_stat_b, "stat", "purple"); MPE_Describe_state( ADIOI_MPE_iread_a, ADIOI_MPE_iread_b, "iread", "purple"); MPE_Describe_state( ADIOI_MPE_iwrite_a, ADIOI_MPE_iwrite_b, "iwrite", "purple"); } } #endif *error_code = MPI_SUCCESS; MPI_Op_create(my_consensus, 1, &ADIO_same_amode); }
int main( int argc, char *argv[] ) { int errs = 0; MPI_Info info; char *keys1[NKEYS] = { (char*)"file", (char*)"soft", (char*)"host" }; char *values1[NKEYS] = { (char*)"runfile.txt", (char*)"2:1000:4,3:1000:7", (char*)"myhost.myorg.org" }; char value[MPI_MAX_INFO_VAL]; int i, flag; MTest_Init( &argc, &argv ); /* 1,2,3 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ for (i=0; i<NKEYS; i++) { MPI_Info_set( info, keys1[i], values1[i] ); } /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); /* 3,2,1 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ for (i=NKEYS-1; i>=0; i--) { MPI_Info_set( info, keys1[i], values1[i] ); } /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); /* 1,3,2 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ MPI_Info_set( info, keys1[0], values1[0] ); MPI_Info_set( info, keys1[2], values1[2] ); MPI_Info_set( info, keys1[1], values1[1] ); /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); /* 2,1,3 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ MPI_Info_set( info, keys1[1], values1[1] ); MPI_Info_set( info, keys1[0], values1[0] ); MPI_Info_set( info, keys1[2], values1[2] ); /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); /* 2,3,1 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ MPI_Info_set( info, keys1[1], values1[1] ); MPI_Info_set( info, keys1[2], values1[2] ); MPI_Info_set( info, keys1[0], values1[0] ); /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); /* 3,1,2 */ MPI_Info_create( &info ); /* Use only named keys incase the info implementation only supports the predefined keys (e.g., IBM) */ MPI_Info_set( info, keys1[2], values1[2] ); MPI_Info_set( info, keys1[0], values1[0] ); MPI_Info_set( info, keys1[1], values1[1] ); /* Check that all values are present */ for (i=0; i<NKEYS; i++) { MPI_Info_get( info, keys1[i], MPI_MAX_INFO_VAL, value, &flag ); if (!flag) { errs++; printf( "No value for key %s\n", keys1[i] ); } if (strcmp( value, values1[i] )) { errs++; printf( "Incorrect value for key %s\n", keys1[i] ); } } MPI_Info_free( &info ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
int PIOc_Init_Intracomm(const MPI_Comm comp_comm, const int num_iotasks, const int stride, const int base,const int rearr, int *iosysidp) { iosystem_desc_t *iosys; int ierr; int ustride; int lbase; iosys = (iosystem_desc_t *) malloc(sizeof(iosystem_desc_t)); iosys->union_comm = comp_comm; iosys->comp_comm = comp_comm; iosys->my_comm = comp_comm; iosys->io_comm = MPI_COMM_NULL; iosys->intercomm = MPI_COMM_NULL; iosys->error_handler = PIO_INTERNAL_ERROR; iosys->async_interface= false; iosys->compmaster = false; iosys->iomaster = false; iosys->ioproc = false; iosys->default_rearranger = rearr; iosys->num_iotasks = num_iotasks; ustride = stride; CheckMPIReturn(MPI_Comm_rank(comp_comm, &(iosys->comp_rank)),__FILE__,__LINE__); CheckMPIReturn(MPI_Comm_size(comp_comm, &(iosys->num_comptasks)),__FILE__,__LINE__); if(iosys->comp_rank==0) iosys->compmaster = true; #ifdef BGQxxx lbase = base; determineiotasks(comp_comm, &(iosys->num_iotasks), &lbase, &stride, &rearr, &(iosys->ioproc)); if(iosys->comp_rank==0) printf("%s %d %d\n",__FILE__,__LINE__,iosys->num_iotasks); if(iosys->ioproc) printf("%s %d %d\n",__FILE__,__LINE__,iosys->comp_rank); #else if((iosys->num_comptasks == 1) && (num_iotasks*ustride > 1)) { // This is a serial run with a bad configuration. Set up a single task. fprintf(stderr, "PIO_TP PIOc_Init_Intracomm reset stride and tasks.\n"); iosys->num_iotasks = 1; ustride = 1; } if((iosys->num_iotasks < 1) || ((iosys->num_iotasks*ustride) > iosys->num_comptasks)){ fprintf(stderr, "PIO_TP PIOc_Init_Intracomm error\n"); fprintf(stderr, "num_iotasks=%d, ustride=%d, num_comptasks=%d\n", num_iotasks, ustride, iosys->num_comptasks); return PIO_EBADID; } iosys->ioranks = (int *) calloc(sizeof(int), iosys->num_iotasks); for(int i=0;i< iosys->num_iotasks; i++){ iosys->ioranks[i] = (base + i*ustride) % iosys->num_comptasks; if(iosys->ioranks[i] == iosys->comp_rank) iosys->ioproc = true; } iosys->ioroot = iosys->ioranks[0]; #endif CheckMPIReturn(MPI_Info_create(&(iosys->info)),__FILE__,__LINE__); iosys->info = MPI_INFO_NULL; if(iosys->comp_rank == iosys->ioranks[0]) iosys->iomaster = true; CheckMPIReturn(MPI_Comm_group(comp_comm, &(iosys->compgroup)),__FILE__,__LINE__); CheckMPIReturn(MPI_Group_incl(iosys->compgroup, iosys->num_iotasks, iosys->ioranks, &(iosys->iogroup)),__FILE__,__LINE__); CheckMPIReturn(MPI_Comm_create(comp_comm, iosys->iogroup, &(iosys->io_comm)),__FILE__,__LINE__); if(iosys->ioproc) CheckMPIReturn(MPI_Comm_rank(iosys->io_comm, &(iosys->io_rank)),__FILE__,__LINE__); else iosys->io_rank = -1; iosys->union_rank = iosys->comp_rank; *iosysidp = pio_add_to_iosystem_list(iosys); pio_get_env(); /* allocate buffer space for compute nodes */ compute_buffer_init(*iosys); return PIO_NOERR; }
void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { /* if fd->info is null, create a new info object. Initialize fd->info to default values. Initialize fd->hints to default values. Examine the info object passed by the user. If it contains values that ROMIO understands, override the default. */ MPI_Info info; char *value; int flag, nprocs=0, len; int ok_to_override_cb_nodes=0; static char myname[] = "ADIOI_GEN_SETINFO"; /* if we've already set up default hints and the user has not asked us to * process any hints (MPI_INFO_NULL), then we can short-circuit hint * processing */ if (fd->hints->initialized && fd->info == MPI_INFO_NULL) { *error_code = MPI_SUCCESS; return; } if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info)); info = fd->info; MPI_Comm_size(fd->comm, &nprocs); /* Note that fd->hints is allocated at file open time; thus it is * not necessary to allocate it, or check for allocation, here. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); if (value == NULL) { *error_code = MPIO_Err_create_code(*error_code, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_OTHER, "**nomem2",0); return; } /* initialize info and hints to default values if they haven't been * previously initialized */ if (!fd->hints->initialized) { /* buffer size for collective I/O */ ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use * collective buffering */ ADIOI_Info_set(info, "romio_cb_read", "automatic"); fd->hints->cb_read = ADIOI_HINT_AUTO; ADIOI_Info_set(info, "romio_cb_write", "automatic"); fd->hints->cb_write = ADIOI_HINT_AUTO; fd->hints->cb_config_list = NULL; /* number of processes that perform I/O in collective I/O */ ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs); ADIOI_Info_set(info, "cb_nodes", value); fd->hints->cb_nodes = nprocs; /* hint indicating that no indep. I/O will be performed on this file */ ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = 0; /* hint instructing the use of persistent file realms */ ADIOI_Info_set(info, "romio_cb_pfr", "disable"); fd->hints->cb_pfr = ADIOI_HINT_DISABLE; /* hint guiding the assignment of persistent file realms */ ADIOI_Info_set(info, "romio_cb_fr_types", "aar"); fd->hints->cb_fr_type = ADIOI_FR_AAR; /* hint to align file realms with a certain byte value */ ADIOI_Info_set(info, "romio_cb_fr_alignment", "1"); fd->hints->cb_fr_alignment = 1; /* hint to set a threshold percentage for a datatype's size/extent at * which data sieving should be done in collective I/O */ ADIOI_Info_set(info, "romio_cb_ds_threshold", "0"); fd->hints->cb_ds_threshold = 0; /* hint to switch between point-to-point or all-to-all for two-phase */ ADIOI_Info_set(info, "romio_cb_alltoall", "automatic"); fd->hints->cb_alltoall = ADIOI_HINT_AUTO; /* deferred_open derived from no_indep_rw and cb_{read,write} */ fd->hints->deferred_open = 0; /* buffer size for data sieving in independent reads */ ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT); fd->hints->ind_rd_buffer_size = atoi(ADIOI_IND_RD_BUFFER_SIZE_DFLT); /* buffer size for data sieving in independent writes */ ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT); fd->hints->ind_wr_buffer_size = atoi(ADIOI_IND_WR_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use data * sieving */ ADIOI_Info_set(info, "romio_ds_read", "automatic"); fd->hints->ds_read = ADIOI_HINT_AUTO; ADIOI_Info_set(info, "romio_ds_write", "automatic"); fd->hints->ds_write = ADIOI_HINT_AUTO; /* still to do: tune this a bit for a variety of file systems. there's * no good default value so just leave it unset */ fd->hints->min_fdomain_size = 0; fd->hints->striping_unit = 0; fd->hints->initialized = 1; /* ADIO_Open sets up collective buffering arrays. If we are in this * path from say set_file_view, then we've don't want to adjust the * array: we'll get a segfault during collective i/o. We only want to * look at the users cb_nodes if it's open time */ ok_to_override_cb_nodes = 1; } /* add in user's info if supplied */ if (users_info != MPI_INFO_NULL) { ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size", &(fd->hints->cb_buffer_size), myname, error_code); /* aligning file realms to certain sizes (e.g. stripe sizes) * may benefit I/O performance */ ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_alignment", &(fd->hints->cb_fr_alignment), myname, error_code); /* for collective I/O, try to be smarter about when to do data sieving * using a specific threshold for the datatype size/extent * (percentage 0-100%) */ ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_ds_threshold", &(fd->hints->cb_ds_threshold), myname, error_code); ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_alltoall", &(fd->hints->cb_alltoall), myname, error_code); /* new hints for enabling/disabling coll. buffering on * reads/writes */ ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_read", &(fd->hints->cb_read), myname, error_code); if (fd->hints->cb_read == ADIOI_HINT_DISABLE) { /* romio_cb_read overrides no_indep_rw */ ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_write", &(fd->hints->cb_write), myname, error_code); if (fd->hints->cb_write == ADIOI_HINT_DISABLE) { /* romio_cb_write overrides no_indep_rw */ ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } /* enable/disable persistent file realms for collective I/O */ /* may want to check for no_indep_rdwr hint as well */ ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_pfr", &(fd->hints->cb_pfr), myname, error_code); /* file realm assignment types ADIOI_FR_AAR(0), ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify a regular fr size in bytes. probably not the best way... */ ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_type", &(fd->hints->cb_fr_type), myname, error_code); /* Has the user indicated all I/O will be done collectively? */ ADIOI_Info_check_and_install_true(fd, users_info, "romio_no_indep_rw", &(fd->hints->no_indep_rw), myname, error_code); if (fd->hints->no_indep_rw == 1) { /* if 'no_indep_rw' set, also hint that we will do * collective buffering: if we aren't doing independent io, * then we have to do collective */ ADIOI_Info_set(info, "romio_cb_write", "enable"); ADIOI_Info_set(info, "romio_cb_read", "enable"); fd->hints->cb_read = 1; fd->hints->cb_write = 1; } /* new hints for enabling/disabling data sieving on * reads/writes */ ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_read", &(fd->hints->ds_read), myname, error_code); ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_write", &(fd->hints->ds_write), myname, error_code); if (ok_to_override_cb_nodes) { /* MPI_File_open path sets up some data structrues that don't * get resized in the MPI_File_set_view path, so ignore * cb_nodes in the set_view case */ ADIOI_Info_check_and_install_int(fd, users_info, "cb_nodes", &(fd->hints->cb_nodes), myname, error_code); if ((fd->hints->cb_nodes <= 0) || (fd->hints->cb_nodes > nprocs)) { /* can't ask for more aggregators than mpi processes, though it * might be interesting to think what such oversubscription * might mean... someday */ ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs); ADIOI_Info_set(info, "cb_nodes", value); fd->hints->cb_nodes = nprocs; } } /* if (ok_to_override_cb_nodes) */ ADIOI_Info_check_and_install_int(fd, users_info, "ind_wr_buffer_size", &(fd->hints->ind_wr_buffer_size), myname, error_code); ADIOI_Info_check_and_install_int(fd, users_info, "ind_rd_buffer_size", &(fd->hints->ind_rd_buffer_size), myname, error_code); if (fd->hints->cb_config_list == NULL) { /* only set cb_config_list if it isn't already set. Note that * since we set it below, this ensures that the cb_config_list hint * will be set at file open time either by the user or to the * default */ /* if it has been set already, we ignore it the second time. * otherwise we would get an error if someone used the same info * value with a cb_config_list value in it in a couple of calls, * which would be irritating. */ ADIOI_Info_check_and_install_str(fd, users_info, "cb_config_list", &(fd->hints->cb_config_list), myname, error_code); } ADIOI_Info_check_and_install_int(fd, users_info, "romio_min_fdomain_size", &(fd->hints->min_fdomain_size), myname, error_code); /* Now we use striping unit in common code so we should process hints for it. */ ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit", &(fd->hints->striping_unit), myname, error_code); } /* Begin hint post-processig: some hints take precidence over or conflict * with others, or aren't supported by some file systems */ /* handle cb_config_list default value here; avoids an extra * free/alloc and insures it is always set */ if (fd->hints->cb_config_list == NULL) { ADIOI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT); len = (strlen(ADIOI_CB_CONFIG_LIST_DFLT)+1) * sizeof(char); fd->hints->cb_config_list = ADIOI_Malloc(len); if (fd->hints->cb_config_list == NULL) { ADIOI_Free(value); *error_code = MPIO_Err_create_code(*error_code, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_OTHER, "**nomem2",0); return; } ADIOI_Strncpy(fd->hints->cb_config_list, ADIOI_CB_CONFIG_LIST_DFLT, len); } /* deferred_open won't be set by callers, but if the user doesn't * explicitly disable collecitve buffering (two-phase) and does hint that * io w/o independent io is going on, we'll set this internal hint as a * convenience */ if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE) \ && (fd->hints->cb_write != ADIOI_HINT_DISABLE)\ && fd->hints->no_indep_rw ) ) { fd->hints->deferred_open = 1; } else { /* setting romio_no_indep_rw enable and romio_cb_{read,write} * disable at the same time doesn't make sense. honor * romio_cb_{read,write} and force the no_indep_rw hint to * 'disable' */ ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = 0; fd->hints->deferred_open = 0; } if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) { /* disable data sieving for fs that do not support file locking */ ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag) { /* get rid of this value if it is set */ ADIOI_Info_delete(info, "ind_wr_buffer_size"); } /* note: leave ind_wr_buffer_size alone; used for other cases * as well. -- Rob Ross, 04/22/2003 */ ADIOI_Info_set(info, "romio_ds_write", "disable"); fd->hints->ds_write = ADIOI_HINT_DISABLE; } ADIOI_Free(value); *error_code = MPI_SUCCESS; }
int main(int argc, char *argv[]) { int i, errs; int min_time = D_SLEEP_TIME, max_time = D_SLEEP_TIME, iter_time = 2, time; MPI_Info win_info = MPI_INFO_NULL; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); debug_printf("[%d]init done, %d/%d\n", rank, rank, nprocs); if (nprocs < 2) { fprintf(stderr, "Please run using at least 2 processes\n"); goto exit; } #ifdef MTCORE /* first argv is nh */ if (argc >= 5) { min_time = atoi(argv[2]); max_time = atoi(argv[3]); iter_time = atoi(argv[4]); } if (argc >= 6) { NOP = atoi(argv[5]); } #else if (argc >= 4) { min_time = atoi(argv[1]); max_time = atoi(argv[2]); iter_time = atoi(argv[3]); } if (argc >= 5) { NOP = atoi(argv[4]); } #endif locbuf = malloc(sizeof(double) * NOP); for (i = 0; i < NOP; i++) { locbuf[i] = 1.0; } MPI_Info_create(&win_info); MPI_Info_set(win_info, (char *) "epoch_type", (char *) "fence"); // size in byte MPI_Win_allocate(sizeof(double) * nprocs, sizeof(double), win_info, MPI_COMM_WORLD, &winbuf, &win); debug_printf("[%d]win_allocate done\n", rank); for (time = min_time; time <= max_time; time *= iter_time) { for (i = 0; i < nprocs; i++) { winbuf[i] = 0.0; } MPI_Barrier(MPI_COMM_WORLD); errs = run_test(time); if (errs > 0) break; if (time == 0) break; } exit: if (win_info != MPI_INFO_NULL) MPI_Info_free(&win_info); if (win != MPI_WIN_NULL) MPI_Win_free(&win); if (locbuf) free(locbuf); MPI_Finalize(); return 0; }
int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, char *msg, int verbose) { MPI_Datatype typevec, newtype, t[3]; int *buf, i, b[3], errcode, errors=0; MPI_File fh; MPI_Aint d[3]; MPI_Status status; int SIZE = (STARTING_SIZE/nprocs)*nprocs; MPI_Info info; if (mynod==0 && verbose) fprintf(stderr, "%s\n", msg); buf = (int *) malloc(SIZE*sizeof(int)); if (buf == NULL) { perror("test_file"); MPI_Abort(MPI_COMM_WORLD, -1); } if (cb_hosts != NULL ) { MPI_Info_create(&info); MPI_Info_set(info, "cb_config_list", cb_hosts); } else { info = MPI_INFO_NULL; } MPI_Type_vector(SIZE/nprocs, 1, nprocs, MPI_INT, &typevec); b[0] = b[1] = b[2] = 1; d[0] = 0; d[1] = mynod*sizeof(int); d[2] = SIZE*sizeof(int); t[0] = MPI_LB; t[1] = typevec; t[2] = MPI_UB; MPI_Type_struct(3, b, d, t, &newtype); MPI_Type_commit(&newtype); MPI_Type_free(&typevec); if (!mynod) { if(verbose) fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using collective I/O\n"); MPI_File_delete(filename, info); } MPI_Barrier(MPI_COMM_WORLD); errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod,i,SIZE); errcode = MPI_File_write_all(fh, buf, 1, newtype, &status); if (errcode != MPI_SUCCESS) { handle_error(errcode, "nc mem - nc file: MPI_File_write_all"); } MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<SIZE; i++) buf[i] = -1; errcode = MPI_File_read_at_all(fh, 0, buf, 1, newtype, &status); if (errcode != MPI_SUCCESS) { handle_error(errcode, "nc mem - nc file: MPI_File_read_at_all"); } /* the verification for N compute nodes is tricky. Say we have 3 * processors. * process 0 sees: 0 -1 -1 3 -1 -1 ... * process 1 sees: -1 34 -1 -1 37 -1 ... * process 2 sees: -1 -1 68 -1 -1 71 ... */ /* verify those leading -1s exist if they should */ for (i=0; i<mynod; i++ ) { if ( buf[i] != -1 ) { if(verbose) fprintf(stderr, "Process %d: buf is %d, should be -1\n", mynod, buf[i]); errors++; } } /* now the modulo games are hairy. processor 0 sees real data in the 0th, * 3rd, 6th... elements of the buffer (assuming nprocs==3 ). proc 1 sees * the data in 1st, 4th, 7th..., and proc 2 sees it in 2nd, 5th, 8th */ for(/* 'i' set in above loop */; i<SIZE; i++) { if ( ((i-mynod)%nprocs) && buf[i] != -1) { if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); errors++; } if ( !((i-mynod)%nprocs) && buf[i] != SEEDER(mynod,i,SIZE) ) { if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], SEEDER(mynod,i,SIZE)); errors++; } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { if(verbose) fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using collective I/O\n"); MPI_File_delete(filename, info); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod,i,SIZE); errcode = MPI_File_write_at_all(fh, mynod*(SIZE/nprocs)*sizeof(int), buf, 1, newtype, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "nc mem - c file: MPI_File_write_at_all"); MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<SIZE; i++) buf[i] = -1; errcode = MPI_File_read_at_all(fh, mynod*(SIZE/nprocs)*sizeof(int), buf, 1, newtype, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "nc mem - c file: MPI_File_read_at_all"); /* just like as above */ for (i=0; i<mynod; i++ ) { if ( buf[i] != -1 ) { if(verbose) fprintf(stderr, "Process %d: buf is %d, should be -1\n", mynod, buf[i]); errors++; } } for(/* i set in above loop */; i<SIZE; i++) { if ( ((i-mynod)%nprocs) && buf[i] != -1) { if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); errors++; } if ( !((i-mynod)%nprocs) && buf[i] != SEEDER(mynod,i,SIZE)) { if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], SEEDER(mynod,i,SIZE) ); errors++; } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { if(verbose) fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using collective I/O\n"); MPI_File_delete(filename, info); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod, i, SIZE); errcode = MPI_File_write_all(fh, buf, SIZE, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "c mem - nc file: MPI_File_write_all"); MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<SIZE; i++) buf[i] = -1; errcode = MPI_File_read_at_all(fh, 0, buf, SIZE, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "c mem - nc file: MPI_File_read_at_all"); /* same crazy checking */ for (i=0; i<SIZE; i++) { if (buf[i] != SEEDER(mynod, i, SIZE)) { if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], SEEDER(mynod, i, SIZE)); errors++; } } MPI_File_close(&fh); MPI_Type_free(&newtype); free(buf); if (info != MPI_INFO_NULL) MPI_Info_free(&info); return errors; }
void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { char *value, *value_in_fd; int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1; struct sattr attr; int err, myrank, fd_sys, perm, amode, old_mask; if (!(fd->info)) { /* This must be part of the open call. can set striping parameters if necessary. */ MPI_Info_create(&(fd->info)); /* has user specified striping or server buffering parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); tmp_val = str_factor; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != str_factor) { FPRINTF(stderr, "ADIOI_PFS_SetInfo: the value for key \"striping_factor\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); tmp_val = str_unit; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != str_unit) { FPRINTF(stderr, "ADIOI_PFS_SetInfo: the value for key \"striping_unit\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); tmp_val = start_iodev; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != start_iodev) { FPRINTF(stderr, "ADIOI_PFS_SetInfo: the value for key \"start_iodevice\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } /* if user has specified striping info, process 0 tries to set it */ if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { MPI_Comm_rank(fd->comm, &myrank); if (!myrank) { if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = old_mask ^ 0666; } else perm = fd->perm; amode = 0; if (fd->access_mode & ADIO_CREATE) amode = amode | O_CREAT; if (fd->access_mode & ADIO_RDONLY) amode = amode | O_RDONLY; if (fd->access_mode & ADIO_WRONLY) amode = amode | O_WRONLY; if (fd->access_mode & ADIO_RDWR) amode = amode | O_RDWR; if (fd->access_mode & ADIO_EXCL) amode = amode | O_EXCL; fd_sys = open(fd->filename, amode, perm); err = fcntl(fd_sys, F_GETSATTR, &attr); if (!err) { if (str_unit > 0) attr.s_sunitsize = str_unit; if ((start_iodev >= 0) && (start_iodev < attr.s_sfactor)) attr.s_start_sdir = start_iodev; if ((str_factor > 0) && (str_factor < attr.s_sfactor)) attr.s_sfactor = str_factor; err = fcntl(fd_sys, F_SETSATTR, &attr); } close(fd_sys); } MPI_Barrier(fd->comm); } /* Has user asked for pfs server buffering to be turned on? If so, mark it as true in fd->info and turn it on in ADIOI_PFS_Open after the file is opened */ MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true"))) MPI_Info_set(fd->info, "pfs_svr_buf", "true"); else MPI_Info_set(fd->info, "pfs_svr_buf", "false"); ADIOI_Free(value); } else MPI_Info_set(fd->info, "pfs_svr_buf", "false"); /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); } else { /* The file has been opened previously and fd->fd_sys is a valid file descriptor. cannot set striping parameters now. */ /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); /* has user specified value for pfs_svr_buf? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) { value_in_fd = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value_in_fd, &flag); if (strcmp(value, value_in_fd)) { if (!strcmp(value, "true")) { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE); if (!err) MPI_Info_set(fd->info, "pfs_svr_buf", "true"); } else { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE); if (!err) MPI_Info_set(fd->info, "pfs_svr_buf", "false"); } } ADIOI_Free(value_in_fd); } ADIOI_Free(value); } } *error_code = MPI_SUCCESS; }
int main(int argc, char **argv) { int *writebuf, *readbuf, i, mynod, nprocs, len, err; char *filename; MPI_Datatype newtype; MPI_File fh; MPI_Status status; MPI_Info info; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { printf("\n*# Usage: atmoicity <mpiparameter> -- -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+1); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } writebuf = (int *) malloc(BUFSIZE*sizeof(int)); readbuf = (int *) malloc(BUFSIZE*sizeof(int)); /* test atomicity of contiguous accesses */ /* initialize file to all zeros */ if (!mynod) { MPI_File_delete(filename, MPI_INFO_NULL); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); for (i=0; i<BUFSIZE; i++) writebuf[i] = 0; MPI_File_write(fh, writebuf, BUFSIZE, MPI_INT, &status); MPI_File_close(&fh); printf("\ntesting contiguous accesses\n"); fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<BUFSIZE; i++) writebuf[i] = 10; for (i=0; i<BUFSIZE; i++) readbuf[i] = 20; MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); /* set atomicity to true */ err = MPI_File_set_atomicity(fh, 1); if (err != MPI_SUCCESS) { printf("Atomic mode not supported on this file system.\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Barrier(MPI_COMM_WORLD); /* process 0 writes and others concurrently read. In atomic mode, the data read must be either all old values or all new values; nothing in between. */ if (!mynod) MPI_File_write(fh, writebuf, BUFSIZE, MPI_INT, &status); else { err = MPI_File_read(fh, readbuf, BUFSIZE, MPI_INT, &status); if (err == MPI_SUCCESS) { if (readbuf[0] == 0) { /* the rest must also be 0 */ for (i=1; i<BUFSIZE; i++) if (readbuf[i] != 0) { printf("Process %d: readbuf[%d] is %d, should be 0\n", mynod, i, readbuf[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } else if (readbuf[0] == 10) { /* the rest must also be 10 */ for (i=1; i<BUFSIZE; i++) if (readbuf[i] != 10) { printf("Process %d: readbuf[%d] is %d, should be 10\n", mynod, i, readbuf[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } else printf("Process %d: readbuf[0] is %d, should be either 0 or 10\n", mynod, readbuf[0]); } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); /* repeat the same test with a noncontiguous filetype */ MPI_Type_vector(BUFSIZE, 1, 2, MPI_INT, &newtype); MPI_Type_commit(&newtype); MPI_Info_create(&info); /* I am setting these info values for testing purposes only. It is better to use the default values in practice. */ MPI_Info_set(info, "ind_rd_buffer_size", "1209"); MPI_Info_set(info, "ind_wr_buffer_size", "1107"); if (!mynod) { MPI_File_delete(filename, MPI_INFO_NULL); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); for (i=0; i<BUFSIZE; i++) writebuf[i] = 0; MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); MPI_File_write(fh, writebuf, BUFSIZE, MPI_INT, &status); MPI_File_close(&fh); printf("\ntesting noncontiguous accesses\n"); fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<BUFSIZE; i++) writebuf[i] = 10; for (i=0; i<BUFSIZE; i++) readbuf[i] = 20; MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); MPI_File_set_atomicity(fh, 1); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) MPI_File_write(fh, writebuf, BUFSIZE, MPI_INT, &status); else { err = MPI_File_read(fh, readbuf, BUFSIZE, MPI_INT, &status); if (err == MPI_SUCCESS) { if (readbuf[0] == 0) { for (i=1; i<BUFSIZE; i++) if (readbuf[i] != 0) { printf("Process %d: readbuf[%d] is %d, should be 0\n", mynod, i, readbuf[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } else if (readbuf[0] == 10) { for (i=1; i<BUFSIZE; i++) if (readbuf[i] != 10) { printf("Process %d: readbuf[%d] is %d, should be 10\n", mynod, i, readbuf[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } else printf("Process %d: readbuf[0] is %d, should be either 0 or 10\n", mynod, readbuf[0]); } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); MPI_Type_free(&newtype); MPI_Info_free(&info); free(writebuf); free(readbuf); free(filename); MPI_Finalize(); return 0; }
void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { piofs_create_t piofs_create; piofs_statfs_t piofs_statfs; char *value, *path, *slash; int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1; int err, myrank, perm, old_mask, nioservers; if ((fd->info) == MPI_INFO_NULL) { /* This must be part of the open call. can set striping parameters if necessary. */ MPI_Info_create(&(fd->info)); /* has user specified striping parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); tmp_val = str_factor; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != str_factor) { FPRINTF(stderr, "ADIOI_PIOFS_SetInfo: the value for key \"striping_factor\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); tmp_val = str_unit; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != str_unit) { FPRINTF(stderr, "ADIOI_PIOFS_SetInfo: the value for key \"striping_unit\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); tmp_val = start_iodev; MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); if (tmp_val != start_iodev) { FPRINTF(stderr, "ADIOI_PIOFS_SetInfo: the value for key \"start_iodevice\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } ADIOI_Free(value); /* if user has specified striping info, process 0 tries to set it */ if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { MPI_Comm_rank(fd->comm, &myrank); if (!myrank) { if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); perm = old_mask ^ 0666; } else perm = fd->perm; /* to find out the number of I/O servers, I need the path to the directory containing the file */ path = strdup(fd->filename); slash = strrchr(path, '/'); if (!slash) strcpy(path, "."); else { if (slash == path) *(path + 1) = '\0'; else *slash = '\0'; } strcpy(piofs_statfs.name, path); err = piofsioctl(0, PIOFS_STATFS, &piofs_statfs); nioservers = (err) ? -1 : piofs_statfs.f_nodes; free(path); str_factor = ADIOI_MIN(nioservers, str_factor); if (start_iodev >= nioservers) start_iodev = -1; strcpy(piofs_create.name, fd->filename); piofs_create.bsu = (str_unit > 0) ? str_unit : -1; piofs_create.cells = (str_factor > 0) ? str_factor : -1; piofs_create.permissions = perm; piofs_create.base_node = (start_iodev >= 0) ? start_iodev : -1; piofs_create.flags = 0; err = piofsioctl(0, PIOFS_CREATE, &piofs_create); } MPI_Barrier(fd->comm); } } } /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); *error_code = MPI_SUCCESS; }
int main(int argc, char **argv) { int rank, nproc; MPI_Info info_in, info_out; int errors = 0, all_errors = 0; MPI_Win win; void *base; char invalid_key[] = "invalid_test_key"; char buf[MPI_MAX_INFO_VAL]; int flag; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); /* Test#1: setting a valid key at window-create time */ MPI_Info_create(&info_in); MPI_Info_set(info_in, (char *)"no_locks", (char *)"true"); MPI_Win_allocate(sizeof(int), sizeof(int), info_in, MPI_COMM_WORLD, &base, &win); MPI_Win_get_info(win, &info_out); MPI_Info_get(info_out, (char *)"no_locks", MPI_MAX_INFO_VAL, buf, &flag); if (!flag || strncmp(buf, "true", strlen("true")) != 0) { if (!flag) printf("%d: no_locks is not defined\n", rank); else printf("%d: no_locks = %s, expected true\n", rank, buf); errors++; } MPI_Info_free(&info_in); MPI_Info_free(&info_out); /* We create a new window with no info argument for the next text to ensure that we have the * default settings */ MPI_Win_free(&win); MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &base, &win); /* Test#2: setting and getting invalid key */ MPI_Info_create(&info_in); MPI_Info_set(info_in, invalid_key, (char *)"true"); MPI_Win_set_info(win, info_in); MPI_Win_get_info(win, &info_out); MPI_Info_get(info_out, invalid_key, MPI_MAX_INFO_VAL, buf, &flag); #ifdef USE_STRICT_MPI /* Check if our invalid key was ignored. Note, this check's MPICH's * behavior, but this behavior may not be required for a standard * conforming MPI implementation. */ if (flag) { printf("%d: %s was not ignored\n", rank, invalid_key); errors++; } #endif MPI_Info_free(&info_in); MPI_Info_free(&info_out); /* Test#3: setting info key "no_lock" to false and getting the key */ MPI_Info_create(&info_in); MPI_Info_set(info_in, (char *)"no_locks", (char *)"false"); MPI_Win_set_info(win, info_in); MPI_Win_get_info(win, &info_out); MPI_Info_get(info_out, (char *)"no_locks", MPI_MAX_INFO_VAL, buf, &flag); if (!flag || strncmp(buf, "false", strlen("false")) != 0) { if (!flag) printf("%d: no_locks is not defined\n", rank); else printf("%d: no_locks = %s, expected false\n", rank, buf); errors++; } if (flag && VERBOSE) printf("%d: no_locks = %s\n", rank, buf); MPI_Info_free(&info_in); MPI_Info_free(&info_out); /* Test#4: setting info key "no_lock" to true and getting the key */ MPI_Info_create(&info_in); MPI_Info_set(info_in, (char *)"no_locks", (char *)"true"); MPI_Win_set_info(win, info_in); MPI_Win_get_info(win, &info_out); MPI_Info_get(info_out, (char *)"no_locks", MPI_MAX_INFO_VAL, buf, &flag); if (!flag || strncmp(buf, "true", strlen("true")) != 0) { if (!flag) printf("%d: no_locks is not defined\n", rank); else printf("%d: no_locks = %s, expected true\n", rank, buf); errors++; } if (flag && VERBOSE) printf("%d: no_locks = %s\n", rank, buf); MPI_Info_free(&info_in); MPI_Info_free(&info_out); /* Test#4: getting other info keys */ MPI_Win_get_info(win, &info_out); MPI_Info_get(info_out, (char *)"accumulate_ordering", MPI_MAX_INFO_VAL, buf, &flag); if (flag && VERBOSE) printf("%d: accumulate_ordering = %s\n", rank, buf); MPI_Info_get(info_out, (char *)"accumulate_ops", MPI_MAX_INFO_VAL, buf, &flag); if (flag && VERBOSE) printf("%d: accumulate_ops = %s\n", rank, buf); MPI_Info_get(info_out, (char *)"same_size", MPI_MAX_INFO_VAL, buf, &flag); if (flag && VERBOSE) printf("%d: same_size = %s\n", rank, buf); MPI_Info_get(info_out, (char *)"alloc_shm", MPI_MAX_INFO_VAL, buf, &flag); if (flag && VERBOSE) printf("%d: alloc_shm = %s\n", rank, buf); MPI_Info_free(&info_out); MPI_Win_free(&win); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }