void getOpSamples(int compIters, int innerIters, int testIters, double** samples, double* sendBuf, double* recvBuf, int messSize) { double start, diff, compResult; int innerx, testx; *samples = (double*)malloc(testIters*sizeof(double)); for ( testx = 0; testx < testIters; testx++ ) { compResult = doComp(compIters, sendBuf, recvBuf); generic_barrier(MPI_COMM_WORLD); MPI_Allreduce(sendBuf, recvBuf, messSize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); start = MPI_Wtime(); for ( innerx = 0; innerx < innerIters; innerx++ ) { MPI_Allreduce(sendBuf, recvBuf, messSize, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); compResult = doComp(compIters, sendBuf, recvBuf); } diff = MPI_Wtime() - start; MPI_Reduce(&diff, &((*samples)[testx]), 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); } return; }
double runLatencyTest ( int bufsize, int iters, MPI_Comm * activeComm ) { int i, currtarg; double start, diff; MPI_Status stat; char *comBuf = NULL; if ( bufsize > 0 ) { comBuf = ( char * ) malloc ( bufsize ); if ( comBuf == NULL ) prestaAbort ( "Failed to allocate latency buffer.\n" ); } currtarg = getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank ); diff = 0.0; if ( isActiveProc ( activeComm ) ) { for ( i = 0; i < 1000; i++ ) { if ( rank < currtarg ) { /* Ensure pair communication has been initialized */ MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); } else { /* Ensure pair communication has been initialized */ MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); } } generic_barrier ( *activeComm ); generic_barrier ( *activeComm ); if ( rank < currtarg ) { /* Time operation loop */ start = MPI_Wtime ( ); for ( i = 0; i < iters; i++ ) { MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); } } else { /* Time operation loop */ start = MPI_Wtime ( ); for ( i = 0; i < iters; i++ ) { MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); } } if ( argStruct.useBarrier ) generic_barrier ( *activeComm ); diff = MPI_Wtime ( ) - start; } MPI_Barrier ( MPI_COMM_WORLD ); if ( comBuf != NULL ) free ( comBuf ); return diff; }
double runNonblockBicomTest ( int bufsize, int iters, MPI_Comm * activeComm ) { int i, currtarg; double diff = 0.0; double start; MPI_Status stat; char *sendBuf, *recvBuf; MPI_Request *sendRequests, *recvRequests; MPI_Status *sendStatuses, *recvStatuses; currtarg = getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank ); sendBuf = ( char * ) malloc ( bufsize ); recvBuf = ( char * ) malloc ( bufsize ); sendRequests = malloc ( sizeof ( MPI_Request ) * argStruct.iters ); recvRequests = malloc ( sizeof ( MPI_Request ) * argStruct.iters ); sendStatuses = malloc ( sizeof ( MPI_Status ) * argStruct.iters ); recvStatuses = malloc ( sizeof ( MPI_Status ) * argStruct.iters ); if ( sendBuf == NULL || recvBuf == NULL || sendRequests == NULL || recvRequests == NULL || sendStatuses == NULL || recvStatuses == NULL ) return 0; memset ( sendBuf, 0, bufsize ); memset ( recvBuf, 0, bufsize ); if ( isActiveProc ( activeComm ) ) { /* Ensure communication paths have been initialized */ MPI_Irecv ( recvBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, recvRequests ); MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, sendRequests ); MPI_Wait ( recvRequests, recvStatuses ); MPI_Wait ( sendRequests, sendStatuses ); generic_barrier ( *activeComm ); generic_barrier ( *activeComm ); /* Time operation loop */ start = MPI_Wtime ( ); #ifdef FINAL_WAITALL for ( i = 0; i < iters; i++ ) { MPI_Irecv ( recvBuf, bufsize, MPI_BYTE, currtarg, MPI_ANY_TAG, MPI_COMM_WORLD, &recvRequests[i] ); } for ( i = 0; i < iters; i++ ) { MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, i, MPI_COMM_WORLD, &sendRequests[i] ); } MPI_Waitall ( argStruct.iters, sendRequests, sendStatuses ); MPI_Waitall ( argStruct.iters, recvRequests, recvStatuses ); #else for ( i = 0; i < iters; i++ ) { MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, i, MPI_COMM_WORLD, &sendRequests[0] ); MPI_Recv ( recvBuf, bufsize, MPI_BYTE, currtarg, MPI_ANY_TAG, MPI_COMM_WORLD, &stat ); MPI_Wait ( sendRequests, sendStatuses ); } #endif if ( argStruct.useBarrier ) generic_barrier ( *activeComm ); diff = MPI_Wtime ( ) - start; } free ( sendBuf ); free ( recvBuf ); free ( sendRequests ); free ( recvRequests ); free ( sendStatuses ); free ( recvStatuses ); MPI_Barrier ( MPI_COMM_WORLD ); if ( diff > 0 && argStruct.sumLocalBW == 1 ) return ( ( double ) bufsize * ( double ) iters ) / diff; else return diff; }
double runBicomTest ( int bufsize, int iters, MPI_Comm * activeComm ) { int i, currtarg; double start, diff; char *sendbuf, *recvbuf, *validate_buf; MPI_Status stat; long long err_count = 0; currtarg = getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank ); diff = 0.0; if ( currtarg != -1 && isActiveProc ( activeComm ) ) { sendbuf = ( char * ) malloc ( bufsize ); recvbuf = ( char * ) malloc ( bufsize ); memset ( sendbuf, 0, bufsize ); memset ( recvbuf, 0, bufsize ); /* Ensure communication paths have been initialized */ MPI_Sendrecv ( sendbuf, bufsize, MPI_BYTE, currtarg, 0, recvbuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); generic_barrier ( *activeComm ); generic_barrier ( *activeComm ); /* Time operation loop */ start = MPI_Wtime ( ); if ( presta_check_data == 1 ) validate_buf = malloc ( bufsize ); for ( i = 0; i < iters; i++ ) { if ( presta_check_data == 1 ) { set_data_values ( bufsize, sendbuf ); memcpy ( validate_buf, sendbuf, bufsize ); } MPI_Sendrecv ( sendbuf, bufsize, MPI_BYTE, currtarg, 0, recvbuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); if ( presta_check_data == 1 ) { err_count = check_data_values ( bufsize, recvbuf, validate_buf, MPI_BYTE, PRESTA_OP_P2P ); if ( err_count > 0 ) { prestaWarn ( "Bidirectional receive data check failed with %d errors\n", err_count ); presta_data_err_total += err_count; } } } if ( presta_check_data == 1 ) free ( validate_buf ); if ( argStruct.useBarrier ) generic_barrier ( *activeComm ); diff = MPI_Wtime ( ) - start; free ( sendbuf ); free ( recvbuf ); } MPI_Barrier ( MPI_COMM_WORLD ); if ( diff > 0 && argStruct.sumLocalBW == 1 ) return ( ( double ) bufsize * ( double ) iters ) / diff; else return diff; }
double runUnicomTest ( int bufsize, int iters, MPI_Comm * activeComm ) { int i, currtarg; double diff = 0.0; double start; MPI_Status stat; char *comBuf; currtarg = getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank ); diff = 0; if ( isActiveProc ( activeComm ) ) { comBuf = ( char * ) malloc ( bufsize ); memset ( comBuf, 0, bufsize ); /* Ensure communication paths have been initialized */ if ( rank < currtarg ) MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); else { MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); } generic_barrier ( *activeComm ); generic_barrier ( *activeComm ); if ( rank < currtarg ) { /* Time operation loop */ start = MPI_Wtime ( ); for ( i = 0; i < iters; i++ ) { if ( presta_check_data == 1 ) set_data_values ( bufsize, comBuf ); MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD ); } } else { void *validate_buf = NULL; int err_count = 0; if ( presta_check_data == 1 ) { validate_buf = malloc ( bufsize ); } start = MPI_Wtime ( ); for ( i = 0; i < iters; i++ ) { if ( presta_check_data == 1 ) set_data_values ( bufsize, validate_buf ); MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD, &stat ); if ( presta_check_data == 1 ) { err_count = check_data_values ( bufsize, comBuf, validate_buf, MPI_BYTE, PRESTA_OP_P2P ); if ( err_count > 0 ) { prestaWarn ( "Unidirectional receive data check failed with %d errors\n", err_count ); presta_data_err_total += err_count; } } } if ( presta_check_data == 1 ) { free ( validate_buf ); } } if ( argStruct.useBarrier ) generic_barrier ( *activeComm ); diff = MPI_Wtime ( ) - start; free ( comBuf ); } MPI_Barrier ( MPI_COMM_WORLD ); if ( diff > 0 && argStruct.sumLocalBW == 1 ) return ( ( double ) bufsize * ( double ) iters ) / ( diff * 2 ); else return diff; }