Ejemplo n.º 1
0
KineticStatus ExecuteOperation(
    struct UtilConfig * cfg)
{
    KineticStatus status = KINETIC_STATUS_INVALID;
    KineticLogInfo * logInfo;
    ByteArray tmpArray;

    switch (cfg->opID) {

        case OPT_NOOP:
            status = KineticClient_NoOp(cfg->session);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("NoOp operation completed successfully."
                    " Kinetic Device is alive and well!\n"); }
            break;

        case OPT_PUT:
            status = KineticClient_Put(cfg->session, &cfg->entry, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("Put operation completed successfully."
                       " Your data has been stored!\n");
                PrintEntry(&cfg->entry);
            }
            break;

        case OPT_GET:
            status = KineticClient_Get(cfg->session, &cfg->entry, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("Get executed successfully.\n");
                PrintEntry(&cfg->entry);
            }
            break;

        case OPT_GETNEXT:
            status = KineticClient_GetNext(cfg->session, &cfg->entry, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("GetNext executed successfully.\n");
                PrintEntry(&cfg->entry);
            }
            break;

        case OPT_GETPREVIOUS:
            status = KineticClient_GetPrevious(cfg->session, &cfg->entry, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("GetPrevious executed successfully.\n");
                PrintEntry(&cfg->entry);
            }
            break;

        case OPT_DELETE:
            status = KineticClient_Delete(cfg->session, &cfg->entry, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("Delete executed successfully. The entry has been destroyed!\n");
                PrintEntry(&cfg->entry);
            }
            break;

        case OPT_GETLOG:
            status = KineticAdminClient_GetLog(cfg->adminSession, cfg->logType, &logInfo, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("GetLog executed successfully.\n"
                       "The device log info has been retrieved!\n");
                PrintLogInfo(cfg->logType, logInfo);
            }
            break;

        case OPT_GETDEVICESPECIFICLOG:
            if (strlen(cfg->deviceLogName) == 0) {
                fprintf(stderr, "Device-specific log type requires '--devicelogname' to be set!\n");
                exit(1);
            }
            tmpArray.data = (uint8_t*)cfg->deviceLogName;
            tmpArray.len = strlen(cfg->deviceLogName);
            status = KineticAdminClient_GetDeviceSpecificLog(cfg->adminSession, tmpArray, &logInfo, NULL);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("GetDeviceSpecificLog executed successfully."
                       "The device-specific device log info has been retrieved!\n");
                printf("Device-Specific Log Info:\n");
                char* dev = calloc(1, logInfo->device->name.len + 1);
                memcpy(dev, logInfo->device->name.data, logInfo->device->name.len);
                printf("  %s\n", dev);
                free(dev);
            }
            else if (status == KINETIC_STATUS_NOT_FOUND) {
                fprintf(stderr, "The specified device-specific log '%s' was not found on the device!\n", cfg->deviceLogName);
                status = KINETIC_STATUS_SUCCESS;
            }
            break;

        case OPT_SETERASEPIN:
            status = KineticAdminClient_SetErasePin(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)),
                ByteArray_Create(cfg->newPin, strlen(cfg->newPin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SetErasePin executed successfully.\n"
                       "The kinetic device erase pin has been changed!\n"); }
            break;

        case OPT_INSTANTERASE:
            status = KineticAdminClient_InstantErase(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("InstantErase executed successfully.\n"
                       "The kinetic device has been erased!\n"); }
            break;

        case OPT_SECUREERASE:
            status = KineticAdminClient_SecureErase(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SecureErase executed successfully.\n"
                       "The kinetic device has been erased!\n"); }
            break;

        case OPT_SETLOCKPIN:
            status = KineticAdminClient_SetLockPin(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)),
                ByteArray_Create(cfg->newPin, strlen(cfg->newPin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SetLockPin executed successfully.\n"
                       "The kinetic device lock/unlock pin has been changed!\n"); }
            break;

        case OPT_LOCKDEVICE:
            status = KineticAdminClient_LockDevice(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("LockDevice executed successfully.\n"
                       "The kinetic device is now locked!\n"); }
            break;

        case OPT_UNLOCKDEVICE:
            status = KineticAdminClient_UnlockDevice(cfg->adminSession,
                ByteArray_Create(cfg->pin, strlen(cfg->pin)));
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("UnlockDevice executed successfully.\n"
                       "The kinetic device is now unlocked!\n"); }
            break;

        case OPT_SETCLUSTERVERSION:
            status = KineticAdminClient_SetClusterVersion(cfg->adminSession, cfg->newClusterVersion);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SetClusterVersion executed successfully.\n"
                       "The kinetic device's cluster version has been updated!\n"); }
            break;

        case OPT_SETACL:
            status = KineticAdminClient_SetACL(cfg->adminSession, cfg->file);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SetACL executed successfully.\n"
                       "The kinetic device ACLs have been replaced!\n"); }
            break;

        case OPT_UPDATEFIRMWARE:
            status = KineticAdminClient_UpdateFirmware(cfg->session, cfg->file);
            if (status == KINETIC_STATUS_SUCCESS) {
                printf("SecureErase executed successfully.\n"
                       "The kinetic device has been restored to empty status!\n"); }
            break;

        default:
            fprintf(stderr, "Specified operation '%d' is invalid!\n",
                (int)cfg->opID);
            exit(-1);
    };

    // Print out status code description if operation was not successful
    if (status != KINETIC_STATUS_SUCCESS) {
        fprintf(stderr, "\nERROR: Operation '%s' failed with status '%s'\n",
            GetOptString(cfg->opID), Kinetic_GetStatusDescription(status));
    }

    return status;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[])
{
    FILE *out=0;		/* Output data file 			*/
    char s[255]; 		/* Generic string			*/
    char *memtmp;
    char *memtmp1;
    MPI_Status status;

    int ii, i, j, k, n, nq,	/* Loop indices				*/
	bufoffset = 0,		/* Align buffer to this			*/
	bufalign = 16*1024,	/* Boundary to align buffer to		*/
	nrepeat01, nrepeat12,	/* Number of time to do the transmission*/
	nrepeat012,
	len,			/* Number of bytes to be transmitted	*/
	inc = 1,		/* Increment value			*/
	pert,			/* Perturbation value			*/
        ipert,                  /* index of the perturbation loop	*/
	start = 0,		/* Starting value for signature curve 	*/
	end = MAXINT,		/* Ending value for signature curve	*/
	printopt = 1,		/* Debug print statements flag		*/
	middle_rank = 0,        /* rank 0, 1 or 2 where 2-0-1 or 0-1-2 or 1-2-0 */
	tint;
    
    ArgStruct	args01, args12, args012;/* Argumentsfor all the calls	*/
    
    double t, t0, t1,           /* Time variables			*/
	tlast01, tlast12, tlast012,/* Time for the last transmission	*/
	latency01, latency12,	/* Network message latency		*/
	latency012, tdouble;    /* Network message latency to go from 0 -> 1 -> 2 */
#ifdef CREATE_DIFFERENCE_CURVES
    int itrial, ntrials;
    double *dtrials;
#endif

    Data *bwdata01, *bwdata12, *bwdata012;/* Bandwidth curve data 	*/
    
    BOOL bNoCache = FALSE;
    BOOL bSavePert = FALSE;
    BOOL bUseMegaBytes = FALSE;

    MPI_Init(&argc, &argv);
    
    MPI_Comm_size(MPI_COMM_WORLD, &g_nNproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &g_nIproc);
    
    if (g_nNproc != 3)
    {
	if (g_nIproc == 0)
	    PrintOptions();
	MPI_Finalize();
	exit(0);
    }

    GetOptDouble(&argc, &argv, "-time", &g_STOPTM);
    GetOptInt(&argc, &argv, "-reps", &g_NSAMP);
    GetOptInt(&argc, &argv, "-start", &start);
    GetOptInt(&argc, &argv, "-end", &end);
    bNoCache = GetOpt(&argc, &argv, "-nocache");
    bUseMegaBytes = GetOpt(&argc, &argv, "-mb");
    if (GetOpt(&argc, &argv, "-noprint"))
	printopt = 0;
    bSavePert = GetOpt(&argc, &argv, "-pert");
    GetOptInt(&argc, &argv, "-middle", &middle_rank);
    if (middle_rank < 0 || middle_rank > 2)
	middle_rank = 0;

    bwdata01 = malloc((g_NSAMP+1) * sizeof(Data));
    bwdata12 = malloc((g_NSAMP+1) * sizeof(Data));
    bwdata012 = malloc((g_NSAMP+1) * sizeof(Data));

    if (g_nIproc == 0)
	strcpy(s, "adapt.out");
    GetOptString(&argc, &argv, "-out", s);
    
    if (start > end)
    {
	fprintf(stdout, "Start MUST be LESS than end\n");
	exit(420132);
    }

    Setup(middle_rank, &args01, &args12, &args012);

    if (g_nIproc == 0)
    {
	if ((out = fopen(s, "w")) == NULL)
	{
	    fprintf(stdout,"Can't open %s for output\n", s);
	    exit(1);
	}
    }

    /* Calculate latency */
    switch (g_proc_loc)
    {
    case LEFT_PROCESS:
	latency01 = TestLatency(&args01);
	/*printf("[0] latency01 = %0.9f\n", latency01);fflush(stdout);*/
	RecvTime(&args01, &latency12);
	/*printf("[0] latency12 = %0.9f\n", latency12);fflush(stdout);*/
	break;
    case MIDDLE_PROCESS:
	latency01 = TestLatency(&args01);
	/*printf("[1] latency01 = %0.9f\n", latency01);fflush(stdout);*/
	SendTime(&args12, &latency01);
	latency12 = TestLatency(&args12);
	/*printf("[1] latency12 = %0.9f\n", latency12);fflush(stdout);*/
	SendTime(&args01, &latency12);
	break;
    case RIGHT_PROCESS:
	RecvTime(&args12, &latency01);
	/*printf("[2] latency01 = %0.9f\n", latency01);fflush(stdout);*/
	latency12 = TestLatency(&args12);
	/*printf("[2] latency12 = %0.9f\n", latency12);fflush(stdout);*/
	break;
    }

    latency012 = TestLatency012(&args012);

    if ((g_nIproc == 0) && printopt)
    {
	printf("Latency%d%d_ : %0.9f\n", g_left_rank, g_middle_rank, latency01);
	printf("Latency_%d%d : %0.9f\n", g_middle_rank, g_right_rank, latency12);
	printf("Latency%d%d%d : %0.9f\n", g_left_rank, g_middle_rank, g_right_rank, latency012);
	fflush(stdout);
	printf("Now starting main loop\n");
	fflush(stdout);
    }
    tlast01 = latency01;
    tlast12 = latency12;
    tlast012 = latency012;
    inc = (start > 1) ? start/2: inc;
    args01.bufflen = start;
    args12.bufflen = start;
    args012.bufflen = start;

#ifdef CREATE_DIFFERENCE_CURVES
    /* print the header line of the output file */
    if (g_nIproc == 0)
    {
	fprintf(out, "bytes\tMbits/s\ttime\tMbits/s\ttime");
	for (ii=1, itrial=0; itrial<MAX_NUM_O12_TRIALS; ii <<= 1, itrial++)
	    fprintf(out, "\t%d", ii);
	fprintf(out, "\n");
	fflush(out);
    }
    ntrials = MAX_NUM_O12_TRIALS;
    dtrials = malloc(sizeof(double)*ntrials);
#endif

    /* Main loop of benchmark */
    for (nq = n = 0, len = start; 
         n < g_NSAMP && tlast012 < g_STOPTM && len <= end; 
	 len = len + inc, nq++)
    {
	if (nq > 2)
	    inc = (nq % 2) ? inc + inc : inc;

	/* clear the old values */
	for (itrial = 0; itrial < ntrials; itrial++)
	{
	    dtrials[itrial] = LONGTIME;
	}

	/* This is a perturbation loop to test nearby values */
	for (ipert = 0, pert = (inc > PERT + 1) ? -PERT : 0;
	     pert <= PERT; 
	     ipert++, n++, pert += (inc > PERT + 1) ? PERT : PERT + 1)
	{


	    /*****************************************************/
	    /*         Run a trial between rank 0 and 1          */
	    /*****************************************************/

	    MPI_Barrier(MPI_COMM_WORLD);


	    if (g_proc_loc == RIGHT_PROCESS)
		goto skip_01_trial;

	    /* Calculate howmany times to repeat the experiment. */
	    if (args01.tr)
	    {
		if (args01.bufflen == 0)
		    nrepeat01 = args01.latency_reps;
		else
		    nrepeat01 = (int)(MAX((RUNTM / ((double)args01.bufflen /
			           (args01.bufflen - inc + 1.0) * tlast01)), TRIALS));
		SendReps(&args01, &nrepeat01);
	    }
	    else
	    {
		RecvReps(&args01, &nrepeat01);
	    }

	    /* Allocate the buffer */
	    args01.bufflen = len + pert;
	    /* printf("allocating %d bytes\n", args01.bufflen * nrepeat01 + bufalign); */
	    if (bNoCache)
	    {
		if ((args01.sbuff = (char *)malloc(args01.bufflen * nrepeat01 + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    else
	    {
		if ((args01.sbuff = (char *)malloc(args01.bufflen + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    /* if ((args01.rbuff = (char *)malloc(args01.bufflen * nrepeat01 + bufalign)) == (char *)NULL) */
	    if ((args01.rbuff = (char *)malloc(args01.bufflen + bufalign)) == (char *)NULL)
	    {
		fprintf(stdout,"Couldn't allocate memory\n");
		fflush(stdout);
		break;
	    }

	    /* save the original pointers in case alignment moves them */
	    memtmp = args01.sbuff;
	    memtmp1 = args01.rbuff;

	    /* Possibly align the data buffer */
	    if (!bNoCache)
	    {
		if (bufalign != 0)
		{
		    args01.sbuff += (bufalign - (POINTER_TO_INT(args01.sbuff) % bufalign) + bufoffset) % bufalign;
		    /* args01.rbuff += (bufalign - ((MPI_Aint)args01.rbuff % bufalign) + bufoffset) % bufalign; */
		}
	    }
	    args01.rbuff += (bufalign - (POINTER_TO_INT(args01.rbuff) % bufalign) + bufoffset) % bufalign;
	    
	    if (args01.tr && printopt)
	    {
		fprintf(stdout,"%3d: %9d bytes %4d times --> ",
		    n, args01.bufflen, nrepeat01);
		fflush(stdout);
	    }
	    
	    /* Finally, we get to transmit or receive and time */
	    if (args01.tr)
	    {
		bwdata01[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args01.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args01.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args01.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args01.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args01.sbuff = memtmp;
			    /* args01.rbuff = memtmp1; */
			}
		    }
		    
		    Sync(&args01);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat01; j++)
		    {
			MPI_Send(args01.sbuff,  args01.bufflen, MPI_BYTE,  args01.nbor, MSG_TAG_01, MPI_COMM_WORLD);
			MPI_Recv(args01.rbuff,  args01.bufflen, MPI_BYTE,  args01.nbor, MSG_TAG_01, MPI_COMM_WORLD, &status);
			if (bNoCache)
			{
			    args01.sbuff += args01.bufflen;
			    /* args01.rbuff += args01.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat01);

		    t1 += t;
		    bwdata01[n].t = MIN(bwdata01[n].t, t);
		}
		SendTime(&args01, &bwdata01[n].t);
	    }
	    else
	    {
		bwdata01[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args01.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args01.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args01.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args01.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args01.sbuff = memtmp;
			    /* args01.rbuff = memtmp1; */
			}
		    }
		    
		    Sync(&args01);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat01; j++)
		    {
			MPI_Recv(args01.rbuff,  args01.bufflen, MPI_BYTE,  args01.nbor, MSG_TAG_01, MPI_COMM_WORLD, &status);
			MPI_Send(args01.sbuff,  args01.bufflen, MPI_BYTE,  args01.nbor, MSG_TAG_01, MPI_COMM_WORLD);
			if (bNoCache)
			{
			    args01.sbuff += args01.bufflen;
			    /* args01.rbuff += args01.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat01);
		}
		RecvTime(&args01, &bwdata01[n].t);
	    }
	    tlast01 = bwdata01[n].t;
	    bwdata01[n].bits = args01.bufflen * CHARSIZE;
	    bwdata01[n].bps = bwdata01[n].bits / (bwdata01[n].t * 1024 * 1024);
	    bwdata01[n].repeat = nrepeat01;
	    
	    if (args01.tr)
	    {
		if (bSavePert)
		{
		    if (args01.iproc == 0)
		    {
			if (bUseMegaBytes)
			    fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n].bits / 8, bwdata01[n].bps / 8, bwdata01[n].t);
			else
			    fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n].bits / 8, bwdata01[n].bps, bwdata01[n].t);
			fflush(out);
		    }
		    else
		    {
			MPI_Send(&bwdata01[n].bits, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
			MPI_Send(&bwdata01[n].bps, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
			MPI_Send(&bwdata01[n].t, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
		    }
		}
	    }
	    
	    free(memtmp);
	    free(memtmp1);
	    
	    if (args01.tr && printopt)
	    {
		if (bUseMegaBytes)
		    printf(" %6.2f MBps in %0.9f sec\n", bwdata01[n].bps / 8, tlast01);
		else
		    printf(" %6.2f Mbps in %0.9f sec\n", bwdata01[n].bps, tlast01);
		fflush(stdout);
	    }

skip_01_trial:
	    if (g_proc_loc == RIGHT_PROCESS && g_nIproc == 0 && bSavePert)
	    {
		MPI_Recv(&tint, 1, MPI_INT, g_left_rank, 1, MPI_COMM_WORLD, &status);
		fprintf(out, "%d\t", tint/8);
		MPI_Recv(&tdouble, 1, MPI_DOUBLE, g_left_rank, 1, MPI_COMM_WORLD, &status);
		if (bUseMegaBytes)
		    tdouble = tdouble / 8.0;
		fprintf(out, "%f\t", tdouble);
		MPI_Recv(&tdouble, 1, MPI_DOUBLE, g_left_rank, 1, MPI_COMM_WORLD, &status);
		fprintf(out, "%0.9f\t", tdouble);
		fflush(out);
	    }


	    /*****************************************************/
	    /*         Run a trial between rank 1 and 2          */
	    /*****************************************************/

	    MPI_Barrier(MPI_COMM_WORLD);


	    if (g_proc_loc == LEFT_PROCESS)
		goto skip_12_trial;

	    /* Calculate howmany times to repeat the experiment. */
	    if (args12.tr)
	    {
		if (args12.bufflen == 0)
		    nrepeat12 = args12.latency_reps;
		else
		    nrepeat12 = (int)(MAX((RUNTM / ((double)args12.bufflen /
			           (args12.bufflen - inc + 1.0) * tlast12)), TRIALS));
		SendReps(&args12, &nrepeat12);
	    }
	    else
	    {
		RecvReps(&args12, &nrepeat12);
	    }
	    
	    /* Allocate the buffer */
	    args12.bufflen = len + pert;
	    /* printf("allocating %d bytes\n", args12.bufflen * nrepeat12 + bufalign); */
	    if (bNoCache)
	    {
		if ((args12.sbuff = (char *)malloc(args12.bufflen * nrepeat12 + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    else
	    {
		if ((args12.sbuff = (char *)malloc(args12.bufflen + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    /* if ((args12.rbuff = (char *)malloc(args12.bufflen * nrepeat12 + bufalign)) == (char *)NULL) */
	    if ((args12.rbuff = (char *)malloc(args12.bufflen + bufalign)) == (char *)NULL)
	    {
		fprintf(stdout,"Couldn't allocate memory\n");
		fflush(stdout);
		break;
	    }

	    /* save the original pointers in case alignment moves them */
	    memtmp = args12.sbuff;
	    memtmp1 = args12.rbuff;
	    
	    /* Possibly align the data buffer */
	    if (!bNoCache)
	    {
		if (bufalign != 0)
		{
		    args12.sbuff += (bufalign - (POINTER_TO_INT(args12.sbuff) % bufalign) + bufoffset) % bufalign;
		    /* args12.rbuff += (bufalign - ((MPI_Aint)args12.rbuff % bufalign) + bufoffset) % bufalign; */
		}
	    }
	    args12.rbuff += (bufalign - (POINTER_TO_INT(args12.rbuff) % bufalign) + bufoffset) % bufalign;
	    
	    if (args12.tr && printopt)
	    {
		printf("%3d: %9d bytes %4d times --> ", n, args12.bufflen, nrepeat12);
		fflush(stdout);
	    }
	    
	    /* Finally, we get to transmit or receive and time */
	    if (args12.tr)
	    {
		bwdata12[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args12.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args12.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args12.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args12.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args12.sbuff = memtmp;
			    /* args12.rbuff = memtmp1; */
			}
		    }
		    
		    Sync(&args12);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat12; j++)
		    {
			MPI_Send(args12.sbuff,  args12.bufflen, MPI_BYTE,  args12.nbor, MSG_TAG_12, MPI_COMM_WORLD);
			MPI_Recv(args12.rbuff,  args12.bufflen, MPI_BYTE,  args12.nbor, MSG_TAG_12, MPI_COMM_WORLD, &status);
			if (bNoCache)
			{
			    args12.sbuff += args12.bufflen;
			    /* args12.rbuff += args12.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat12);

		    t1 += t;
		    bwdata12[n].t = MIN(bwdata12[n].t, t);
		}
		SendTime(&args12, &bwdata12[n].t);
	    }
	    else
	    {
		bwdata12[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args12.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args12.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args12.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args12.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args12.sbuff = memtmp;
			    /* args12.rbuff = memtmp1; */
			}
		    }
		    
		    Sync(&args12);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat12; j++)
		    {
			MPI_Recv(args12.rbuff,  args12.bufflen, MPI_BYTE,  args12.nbor, MSG_TAG_12, MPI_COMM_WORLD, &status);
			MPI_Send(args12.sbuff,  args12.bufflen, MPI_BYTE,  args12.nbor, MSG_TAG_12, MPI_COMM_WORLD);
			if (bNoCache)
			{
			    args12.sbuff += args12.bufflen;
			    /* args12.rbuff += args12.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat12);
		}
		RecvTime(&args12, &bwdata12[n].t);
	    }
	    tlast12 = bwdata12[n].t;
	    bwdata12[n].bits = args12.bufflen * CHARSIZE;
	    bwdata12[n].bps = bwdata12[n].bits / (bwdata12[n].t * 1024 * 1024);
	    bwdata12[n].repeat = nrepeat12;

	    if (args12.tr)
	    {
		if (bSavePert)
		{
		    if (g_nIproc == 0)
		    {
			if (bUseMegaBytes)
			    fprintf(out,"%f\t%0.9f\t", bwdata12[n].bps / 8, bwdata12[n].t);
			else
			    fprintf(out,"%f\t%0.9f\t", bwdata12[n].bps, bwdata12[n].t);
			fflush(out);
		    }
		    else
		    {
			MPI_Send(&bwdata12[n].bps, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
			MPI_Send(&bwdata12[n].t, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
		    }
		}
	    }
	    
	    free(memtmp);
	    free(memtmp1);
	    
	    if (args12.tr && printopt)
	    {
		if (bUseMegaBytes)
		    printf(" %6.2f MBps in %0.9f sec\n", bwdata12[n].bps / 8, tlast12);
		else
		    printf(" %6.2f Mbps in %0.9f sec\n", bwdata12[n].bps, tlast12);
		fflush(stdout);
	    }

skip_12_trial:
	    if (g_proc_loc == LEFT_PROCESS && g_nIproc == 0 && bSavePert)
	    {
		MPI_Recv(&tdouble, 1, MPI_DOUBLE, g_middle_rank, 1, MPI_COMM_WORLD, &status);
		if (bUseMegaBytes)
		    tdouble = tdouble / 8.0;
		fprintf(out, "%f\t", tdouble);
		MPI_Recv(&tdouble, 1, MPI_DOUBLE, g_middle_rank, 1, MPI_COMM_WORLD, &status);
		fprintf(out, "%0.9f\t", tdouble);
		fflush(out);
	    }


#ifdef CREATE_DIFFERENCE_CURVES
	    /*****************************************************/
	    /*         Run a trial between rank 0, 1 and 2       */
	    /*****************************************************/

	    MPI_Barrier(MPI_COMM_WORLD);


	    /* Calculate howmany times to repeat the experiment. */
	    if (g_nIproc == 0)
	    {
		if (args012.bufflen == 0)
		    nrepeat012 = g_latency012_reps;
		else
		    nrepeat012 = (int)(MAX((RUNTM / ((double)args012.bufflen /
			           (args012.bufflen - inc + 1.0) * tlast012)), TRIALS));
		MPI_Bcast(&nrepeat012, 1, MPI_INT, 0, MPI_COMM_WORLD);
	    }
	    else
	    {
		MPI_Bcast(&nrepeat012, 1, MPI_INT, 0, MPI_COMM_WORLD);
	    }

	    /* Allocate the buffer */
	    args012.bufflen = len + pert;
	    /* printf("allocating %d bytes\n", args12.bufflen * nrepeat012 + bufalign); */
	    if (bNoCache)
	    {
		if ((args012.sbuff = (char *)malloc(args012.bufflen * nrepeat012 + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    else
	    {
		if ((args012.sbuff = (char *)malloc(args012.bufflen + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    /* if ((args012.rbuff = (char *)malloc(args012.bufflen * nrepeat012 + bufalign)) == (char *)NULL) */
	    if ((args012.rbuff = (char *)malloc(args012.bufflen + bufalign)) == (char *)NULL)
	    {
		fprintf(stdout,"Couldn't allocate memory\n");
		fflush(stdout);
		break;
	    }

	    /* save the original pointers in case alignment moves them */
	    memtmp = args012.sbuff;
	    memtmp1 = args012.rbuff;
	    
	    /* Possibly align the data buffer */
	    if (!bNoCache)
	    {
		if (bufalign != 0)
		{
		    args012.sbuff += (bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign;
		    /* args12.rbuff += (bufalign - ((MPI_Aint)args12.rbuff % bufalign) + bufoffset) % bufalign; */
		}
	    }
	    args012.rbuff += (bufalign - (POINTER_TO_INT(args012.rbuff) % bufalign) + bufoffset) % bufalign;
	    
	    if (g_nIproc == 0 && printopt)
	    {
		printf("%3d: %9d bytes %4d times --> ", n, args012.bufflen, nrepeat012);
		fflush(stdout);
	    }

	    for (itrial=0, ii=1; ii <= nrepeat012 && itrial < ntrials; ii <<= 1, itrial++)
	    {
		/* Finally, we get to transmit or receive and time */
		switch (g_proc_loc)
		{
		case LEFT_PROCESS:
		    bwdata012[n].t = LONGTIME;
		    t1 = 0;
		    for (i = 0; i < TRIALS; i++)
		    {
			if (bNoCache)
			{
			    if (bufalign != 0)
			    {
				args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
				/* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			    }
			    else
			    {
				args012.sbuff = memtmp;
				/* args012.rbuff = memtmp1; */
			    }
			}

			Sync012(&args012);
			t0 = MPI_Wtime();
			for (j = 0; j < nrepeat012; j++)
			{
			    MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			    MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			    if (bNoCache)
			    {
				args012.sbuff += args012.bufflen;
				/* args012.rbuff += args012.bufflen; */
			    }
			}
			t = (MPI_Wtime() - t0)/(2 * nrepeat012);

			t1 += t;
			bwdata012[n].t = MIN(bwdata012[n].t, t);
		    }
		    MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		    break;
		case MIDDLE_PROCESS:
		    bwdata012[n].t = LONGTIME;
		    t1 = 0;
		    for (i = 0; i < TRIALS; i++)
		    {
			if (bNoCache)
			{
			    if (bufalign != 0)
			    {
				args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
				/* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			    }
			    else
			    {
				args012.sbuff = memtmp;
				/* args012.rbuff = memtmp1; */
			    }
			}

			Sync012(&args012);
			t0 = MPI_Wtime();

			/******* use the ii variable here !!! ******/

			for (j = 0; j <= nrepeat012-ii; j+=ii)
			{
			    for (k=0; k<ii; k++)
			    {
				MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD);
				MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD, &status);
			    }
			    /* do the left process second because it does the timing and needs to include time to send to the right process. */
			    for (k=0; k<ii; k++)
			    {
				MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
				MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			    }
			    if (bNoCache)
			    {
				args012.sbuff += args012.bufflen;
				/* args012.rbuff += args012.bufflen; */
			    }
			}
			j = nrepeat012 % ii;
			for (k=0; k < j; k++)
			{
			    MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD);
			    MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD, &status);
			}
			/* do the left process second because it does the timing and needs to include time to send to the right process. */
			for (k=0; k < j; k++)
			{
			    MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			    MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			}
			t = (MPI_Wtime() - t0)/(2 * nrepeat012);
		    }
		    MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		    break;
		case RIGHT_PROCESS:
		    bwdata012[n].t = LONGTIME;
		    t1 = 0;
		    for (i = 0; i < TRIALS; i++)
		    {
			if (bNoCache)
			{
			    if (bufalign != 0)
			    {
				args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
				/* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			    }
			    else
			    {
				args012.sbuff = memtmp;
				/* args012.rbuff = memtmp1; */
			    }
			}

			Sync012(&args012);
			t0 = MPI_Wtime();
			for (j = 0; j < nrepeat012; j++)
			{
			    MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			    MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			    if (bNoCache)
			    {
				args012.sbuff += args012.bufflen;
				/* args012.rbuff += args012.bufflen; */
			    }
			}
			t = (MPI_Wtime() - t0)/(2 * nrepeat012);
		    }
		    MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		    break;
		}
		tlast012 = bwdata012[n].t;
		bwdata012[n].bits = args012.bufflen * CHARSIZE;
		bwdata012[n].bps = bwdata012[n].bits / (bwdata012[n].t * 1024 * 1024);
		bwdata012[n].repeat = nrepeat012;
		if (itrial < ntrials)
		{
		    dtrials[itrial] = MIN(dtrials[itrial], bwdata012[n].t);
		}

		if (g_nIproc == 0)
		{
		    if (bSavePert)
		    {
			fprintf(out, "\t%0.9f", bwdata012[n].t);
			fflush(out);
		    }
		    if (printopt)
		    {
			printf(" %0.9f", tlast012);
			fflush(stdout);
		    }
		}
	    }
	    if (g_nIproc == 0)
	    {
		if (bSavePert)
		{
		    fprintf(out, "\n");
		    fflush(out);
		}
		if (printopt)
		{
		    printf("\n");
		    fflush(stdout);
		}
	    }
	    
	    free(memtmp);
	    free(memtmp1);
#endif

#ifdef CREATE_SINGLE_CURVE
	    /*****************************************************/
	    /*         Run a trial between rank 0, 1 and 2       */
	    /*****************************************************/

	    MPI_Barrier(MPI_COMM_WORLD);


	    /* Calculate howmany times to repeat the experiment. */
	    if (g_nIproc == 0)
	    {
		if (args012.bufflen == 0)
		    nrepeat012 = g_latency012_reps;
		else
		    nrepeat012 = (int)(MAX((RUNTM / ((double)args012.bufflen /
			           (args012.bufflen - inc + 1.0) * tlast012)), TRIALS));
		MPI_Bcast(&nrepeat012, 1, MPI_INT, 0, MPI_COMM_WORLD);
	    }
	    else
	    {
		MPI_Bcast(&nrepeat012, 1, MPI_INT, 0, MPI_COMM_WORLD);
	    }

	    /* Allocate the buffer */
	    args012.bufflen = len + pert;
	    /* printf("allocating %d bytes\n", args12.bufflen * nrepeat012 + bufalign); */
	    if (bNoCache)
	    {
		if ((args012.sbuff = (char *)malloc(args012.bufflen * nrepeat012 + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    else
	    {
		if ((args012.sbuff = (char *)malloc(args012.bufflen + bufalign)) == (char *)NULL)
		{
		    fprintf(stdout,"Couldn't allocate memory\n");
		    fflush(stdout);
		    break;
		}
	    }
	    /* if ((args012.rbuff = (char *)malloc(args012.bufflen * nrepeat012 + bufalign)) == (char *)NULL) */
	    if ((args012.rbuff = (char *)malloc(args012.bufflen + bufalign)) == (char *)NULL)
	    {
		fprintf(stdout,"Couldn't allocate memory\n");
		fflush(stdout);
		break;
	    }

	    /* save the original pointers in case alignment moves them */
	    memtmp = args012.sbuff;
	    memtmp1 = args012.rbuff;
	    
	    /* Possibly align the data buffer */
	    if (!bNoCache)
	    {
		if (bufalign != 0)
		{
		    args012.sbuff += (bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign;
		    /* args12.rbuff += (bufalign - ((MPI_Aint)args12.rbuff % bufalign) + bufoffset) % bufalign; */
		}
	    }
	    args012.rbuff += (bufalign - (POINTER_TO_INT(args012.rbuff) % bufalign) + bufoffset) % bufalign;
	    
	    if (g_nIproc == 0 && printopt)
	    {
		printf("%3d: %9d bytes %4d times --> ", n, args012.bufflen, nrepeat012);
		fflush(stdout);
	    }
	    
	    /* Finally, we get to transmit or receive and time */
	    switch (g_proc_loc)
	    {
	    case LEFT_PROCESS:
		bwdata012[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args012.sbuff = memtmp;
			    /* args012.rbuff = memtmp1; */
			}
		    }
		    
		    Sync012(&args012);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat012; j++)
		    {
			MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			if (bNoCache)
			{
			    args012.sbuff += args012.bufflen;
			    /* args012.rbuff += args012.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat012);

		    t1 += t;
		    bwdata012[n].t = MIN(bwdata012[n].t, t);
		}
		MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		break;
	    case MIDDLE_PROCESS:
		bwdata012[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args012.sbuff = memtmp;
			    /* args012.rbuff = memtmp1; */
			}
		    }
		    
		    Sync012(&args012);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat012; j++)
		    {
			MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD);
			MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD, &status);
			MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			if (bNoCache)
			{
			    args012.sbuff += args012.bufflen;
			    /* args012.rbuff += args012.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat012);
		}
		MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		break;
	    case RIGHT_PROCESS:
		bwdata012[n].t = LONGTIME;
		t1 = 0;
		for (i = 0; i < TRIALS; i++)
		{
		    if (bNoCache)
		    {
			if (bufalign != 0)
			{
			    args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign);
			    /* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */
			}
			else
			{
			    args012.sbuff = memtmp;
			    /* args012.rbuff = memtmp1; */
			}
		    }
		    
		    Sync012(&args012);
		    t0 = MPI_Wtime();
		    for (j = 0; j < nrepeat012; j++)
		    {
			MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status);
			MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD);
			if (bNoCache)
			{
			    args012.sbuff += args012.bufflen;
			    /* args012.rbuff += args012.bufflen; */
			}
		    }
		    t = (MPI_Wtime() - t0)/(2 * nrepeat012);
		}
		MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
		break;
	    }
	    tlast012 = bwdata012[n].t;
	    bwdata012[n].bits = args012.bufflen * CHARSIZE;
	    bwdata012[n].bps = bwdata012[n].bits / (bwdata012[n].t * 1024 * 1024);
	    bwdata012[n].repeat = nrepeat012;

	    if (g_nIproc == 0)
	    {
		if (bSavePert)
		{
		    if (bUseMegaBytes)
			fprintf(out, "%f\t%0.9f\n", bwdata012[n].bps / 8, bwdata012[n].t);
		    else
			fprintf(out, "%f\t%0.9f\n", bwdata012[n].bps, bwdata012[n].t);
		    fflush(out);
		}
	    }
	    
	    free(memtmp);
	    free(memtmp1);
	    
	    if (g_nIproc == 0 && printopt)
	    {
		if (bUseMegaBytes)
		    printf(" %6.2f MBps in %0.9f sec\n", bwdata012[n].bps / 8, tlast012);
		else
		    printf(" %6.2f Mbps in %0.9f sec\n", bwdata012[n].bps, tlast012);
		fflush(stdout);
	    }
#endif

	} /* End of perturbation loop */

	if (!bSavePert)/* && g_nIproc == 0)*/
	{
	    /* if we didn't save all of the perturbation loops, find the max and save it */
	    int index01 = 1, index12 = 1;
	    double dmax01 = bwdata01[n-1].bps;
	    double dmax12 = bwdata12[n-1].bps;
#ifdef CREATE_SINGLE_CURVE
	    int index012 = 1;
	    double dmax012 = bwdata012[n-1].bps;
#endif
	    for (; ipert > 1; ipert--)
	    {
		if (bwdata01[n-ipert].bps > dmax01)
		{
		    index01 = ipert;
		    dmax01 = bwdata01[n-ipert].bps;
		}
		if (bwdata12[n-ipert].bps > dmax12)
		{
		    index12 = ipert;
		    dmax12 = bwdata12[n-ipert].bps;
		}
#ifdef CREATE_SINGLE_CURVE
		if (bwdata012[n-ipert].bps > dmax012)
		{
		    index012 = ipert;
		    dmax012 = bwdata012[n-ipert].bps;
		}
#endif
	    }
	    /* get the left stuff out */
	    MPI_Bcast(&index01, 1, MPI_INT, g_left_rank, MPI_COMM_WORLD);
	    MPI_Bcast(&bwdata01[n-index01].bits, 1, MPI_INT, g_left_rank, MPI_COMM_WORLD);
	    MPI_Bcast(&bwdata01[n-index01].bps, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
	    MPI_Bcast(&bwdata01[n-index01].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD);
	    /* get the right stuff out */
	    MPI_Bcast(&index12, 1, MPI_INT, g_middle_rank, MPI_COMM_WORLD);
	    MPI_Bcast(&bwdata12[n-index12].bps, 1, MPI_DOUBLE, g_middle_rank, MPI_COMM_WORLD);
	    MPI_Bcast(&bwdata12[n-index12].t, 1, MPI_DOUBLE, g_middle_rank, MPI_COMM_WORLD);
	    if (g_nIproc == 0)
	    {
		if (bUseMegaBytes)
		{
		    fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n-index01].bits / 8, bwdata01[n-index01].bps / 8, bwdata01[n-index01].t);
		    fprintf(out, "%f\t%0.9f\t", bwdata12[n-index12].bps / 8, bwdata12[n-index12].t);
#ifdef CREATE_SINGLE_CURVE
		    fprintf(out, "%f\t%0.9f\n", bwdata012[n-index012].bps / 8, bwdata012[n-index012].t);
#endif
		}
		else
		{
		    fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n-index01].bits / 8, bwdata01[n-index01].bps, bwdata01[n-index01].t);
		    fprintf(out, "%f\t%0.9f\t", bwdata12[n-index12].bps, bwdata12[n-index12].t);
#ifdef CREATE_SINGLE_CURVE
		    fprintf(out, "%f\t%0.9f\n", bwdata012[n-index012].bps, bwdata012[n-index012].t);
#endif
		}
#ifdef CREATE_DIFFERENCE_CURVES
		for (itrial = 0; itrial < ntrials && dtrials[itrial] != LONGTIME; itrial++)
		{
		    fprintf(out, "%0.9f\t", dtrials[itrial]);
		}
		fprintf(out, "\n");
#endif
		fflush(out);
	    }
	}
    } /* End of main loop  */
	
    if (g_nIproc == 0)
	fclose(out);
    /* THE_END:		 */
    MPI_Finalize();
    free(bwdata01);
    free(bwdata12);
    free(bwdata012);
    return 0;
}