int main() { int i, j, rank, nranks, msgsize, dest; int xdim, ydim; long bufsize; double **buffer; double t_start, t_stop, t_total, d_total, bw; int count[2], src_stride, trg_stride, stride_level; OSP_handle_t osp_handle; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); buffer = (double **) malloc(sizeof(double *) * nranks); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Allocate_handle(&osp_handle); OSP_Barrier_group(OSP_GROUP_WORLD); if (rank == 0) { printf("OSP_PutS Bandwidth in MBPS \n"); printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency"); fflush(stdout); dest = 1; src_stride = MAX_YDIM * sizeof(double); trg_stride = MAX_YDIM * sizeof(double); stride_level = 1; for (xdim = 1; xdim <= MAX_XDIM; xdim *= 2) { count[1] = xdim; for (ydim = 1; ydim <= MAX_YDIM; ydim *= 2) { count[0] = ydim * sizeof(double); for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = OSP_Time_seconds(); OSP_NbPutS(1, stride_level, count, (void *) buffer[dest], &src_stride, (void *) buffer[rank], &trg_stride, osp_handle); } OSP_Wait_handle(osp_handle); t_stop = OSP_Time_seconds(); OSP_Flush(1); char temp[10]; sprintf(temp, "%dX%d", xdim, ydim); t_total = t_stop - t_start; d_total = (xdim*ydim*sizeof(double)*ITERATIONS)/(1024*1024); bw = d_total/t_total; printf("%30s %20.2f \n", temp, bw); fflush(stdout); } } } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Release_segments(OSP_GROUP_WORLD, (void *) buffer[rank]); OSP_Free_segment((void *) buffer[rank]); OSP_Finalize(); return 0; }
int main() { int i, j, rank, nranks, msgsize; int xdim, ydim; long bufsize; double **buffer; double t_start, t_stop, t_latency; int count[2], src_stride, trg_stride, stride_level, peer; double expected, actual; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); buffer = (double **) malloc (sizeof(double *) * nranks); OSP_Barrier_group(OSP_GROUP_WORLD); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } if(rank == 0) { printf("OSP_PutS Latency - local and remote completions - in usec \n"); printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion"); fflush(stdout); } src_stride = MAX_YDIM*sizeof(double); trg_stride = MAX_YDIM*sizeof(double); stride_level = 1; for(xdim=1; xdim<=MAX_XDIM; xdim*=2) { count[1] = xdim; for(ydim=1; ydim<=MAX_YDIM; ydim*=2) { count[0] = ydim*sizeof(double); if(rank == 0) { peer = 1; for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = OSP_Time_seconds(); OSP_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); } t_stop = OSP_Time_seconds(); OSP_Flush(peer); char temp[10]; sprintf(temp,"%dX%d", xdim, ydim); printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = OSP_Time_seconds(); OSP_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); OSP_Flush(peer); } t_stop = OSP_Time_seconds(); printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); } else { peer = 0; expected = (1.0 + (double) peer); OSP_Barrier_group(OSP_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Barrier_group(OSP_GROUP_WORLD); } } } OSP_Release_segments(OSP_GROUP_WORLD, (void *) buffer[rank]); OSP_Free_segment((void *) buffer[rank]); OSP_Finalize(); return 0; }
/********************************************************************* * @fn AlgorithmTask * This task is responsible for running the sensor algorithms * on the incoming sensor data (could be raw or filtered) and * processing output results * * @param none * * @return none * **********************************************************************/ ASF_TASK void AlgorithmTask (ASF_TASK_ARG) { MessageBuffer *rcvMsg = NULLP; OSP_STATUS_t OSP_Status; int alg_count; OSP_GetLibraryVersion(&version); D1_printf("OSP Version: %s\r\n", version->VersionString); /* Initialize the mutex */ mutex_id = osMutexCreate(osMutex(mutexCritSection)); OSP_Status = OSP_Initialize(&gSystemDesc); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "OSP_Initialize Failed"); OSP_SetCalibrationConfig( 0x1); // disable rotational cal. D0_printf("--Alg Task %i\r\n", __LINE__); // Register the input sensors OSP_Status = OSP_RegisterInputSensor(&_AccSensDesc, &_AccHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "OSP_RegisterInputSensor (accel) Failed"); OSP_Status = OSP_RegisterInputSensor(&_MagSensDesc, &_MagHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "OSP_RegisterInputSensor (mag) Failed"); OSP_Status = OSP_RegisterInputSensor(&_GyroSensDesc, &_GyroHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "OSP_RegisterInputSensor (gyro) Failed"); #if 0 SENSOR_SUBSCRIBE(SENSOR_STEP_COUNTER); SENSOR_SUBSCRIBE(SENSOR_STEP_DETECTOR); SENSOR_SUBSCRIBE(SENSOR_SIGNIFICANT_MOTION); SENSOR_SUBSCRIBE(SENSOR_GYROSCOPE_UNCALIBRATED); SENSOR_SUBSCRIBE(SENSOR_MAGNETIC_FIELD_UNCALIBRATED); SENSOR_SUBSCRIBE(SENSOR_GYROSCOPE); SENSOR_SUBSCRIBE(SENSOR_ACCELEROMETER); SENSOR_SUBSCRIBE(SENSOR_MAGNETIC_FIELD); SENSOR_SUBSCRIBE(SENSOR_ORIENTATION); SENSOR_SUBSCRIBE(SENSOR_GRAVITY); SENSOR_SUBSCRIBE(SENSOR_LINEAR_ACCELERATION); SENSOR_SUBSCRIBE(SENSOR_ROTATION_VECTOR); SENSOR_SUBSCRIBE(SENSOR_GAME_ROTATION_VECTOR); SENSOR_SUBSCRIBE(SENSOR_GEOMAGNETIC_ROTATION_VECTOR); // Subscribing private sensor results PRIVATE_SENSOR_SUBSCRIBE(AP_PSENSOR_ACCELEROMETER_UNCALIBRATED); #endif D0_printf("%s: --Alg Task init done\r\n", __func__); while (1) { ASFReceiveMessage(ALGORITHM_TASK_ID, &rcvMsg); if (!(mycount % 64)) { LED_Toggle(LED_GREEN); } switch (rcvMsg->msgId) { case MSG_MAG_DATA: // SendBgTrigger(); case MSG_ACC_DATA: case MSG_GYRO_DATA: mycount++; HandleSensorData(rcvMsg); //keep doing foreground computation until its finished /* Bump clock speed while processing? HY-DBG */ alg_count = 0; do { OSP_Status = OSP_DoForegroundProcessing(); ASF_assert(OSP_Status != OSP_STATUS_UNSPECIFIED_ERROR); alg_count++; if (alg_count > 5) { D0_printf("%s:%i Taking too long\r\n", __func__, __LINE__); break; } } while(OSP_Status != OSP_STATUS_IDLE); /* DBG: * Run background here as the backgound taks doesn't seem to run enough */ while(OSP_DoBackgroundProcessing() != OSP_STATUS_IDLE); break; case MSG_PRESS_DATA: PressureDataResultCallback(&rcvMsg->msg.msgPressData); break; default: /* Unhandled messages */ D1_printf("Alg-FG:!!!UNHANDLED MESSAGE:%d!!!\r\n", rcvMsg->msgId); break; } ASFDeleteMessage( ALGORITHM_TASK_ID, &rcvMsg ); #ifdef DEBUG_TEST_SENSOR_SUBSCRIPTION // Testing subscribe and unsubscribe sensors DebugTestSensorSubscription(); #endif } }
/**************************************************************************************************** * @fn AlgorithmTask * This task is responsible for running the sensor algorithms on the incoming sensor * data (could be raw or filtered) and processing output results * * @param none * * @return none * ***************************************************************************************************/ ASF_TASK void AlgorithmTask ( ASF_TASK_ARG ) { MessageBuffer *rcvMsg = NULLP; osp_status_t OSP_Status; OSP_GetVersion(&version); D1_printf("OSP Version: %s\r\n", version->VersionString); OSP_Status = OSP_Initialize(&gSystemDesc); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_Initialize Failed"); // Register the input sensors OSP_Status = OSP_RegisterInputSensor(&_AccSensDesc, &_AccHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_RegisterSensor (accel) Failed"); OSP_Status = OSP_RegisterInputSensor(&_MagSensDesc, &_MagHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_RegisterSensor (mag) Failed"); OSP_Status = OSP_RegisterInputSensor(&_GyroSensDesc, &_GyroHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_RegisterSensor (gyro) Failed"); // Register output sensors/results OSP_Status = OSP_SubscribeOutputSensor(&stepCounterRequest, &_stepCounterHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_SubscribeResult (SENSOR_STEP_COUNTER) Failed"); OSP_Status = OSP_SubscribeOutputSensor(&sigMotionRequest, &_sigMotionHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_SubscribeResult (SENSOR_CONTEXT_DEVICE_MOTION) Failed"); OSP_Status = OSP_SubscribeOutputSensor(&UnCalAccelRequest, &_unCalAccelHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_SubscribeResult (SENSOR_ACCELEROMETER) Failed"); OSP_Status = OSP_SubscribeOutputSensor(&UnCalMagRequest, &_unCalMagHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_SubscribeResult (SENSOR_MAGNETIC_FIELD) Failed"); OSP_Status = OSP_SubscribeOutputSensor(&UnCalGyroRequest, &_unCalGyroHandle); ASF_assert_msg(OSP_STATUS_OK == OSP_Status, "SensorManager: OSP_SubscribeResult (SENSOR_GYROSCOPE) Failed"); while (1) { ASFReceiveMessage( ALGORITHM_TASK_ID, &rcvMsg ); switch (rcvMsg->msgId) { case MSG_MAG_DATA: SendBgTrigger(); case MSG_ACC_DATA: case MSG_GYRO_DATA: HandleSensorData(rcvMsg); do { OSP_Status = OSP_DoForegroundProcessing(); ASF_assert(OSP_Status != OSP_STATUS_ERROR); } while(OSP_Status != OSP_STATUS_IDLE) ; //keep doing foreground computation until its finished break; default: /* Unhandled messages */ D1_printf("Alg-FG:!!!UNHANDLED MESSAGE:%d!!!\r\n", rcvMsg->msgId); break; } } }
int main() { size_t i, rank, nranks, msgsize, peer; long bufsize; double **buffer; double scaling; double t_start, t_stop, t_latency; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); bufsize = MAX_MSG_SIZE * (ITERATIONS + SKIP); buffer = (double **) malloc(sizeof(double *) * nranks); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); if (rank == 0) { printf("OSP_PutAcc Latency in usec \n"); printf("%20s %22s %22s\n", "Message Size", "Local Completion", "Remote Completion"); fflush(stdout); } for (i = 0; i < (((ITERATIONS + SKIP) * MAX_MSG_SIZE) / sizeof(double)); i++) { *(buffer[rank] + i) = 1.0 + rank; } scaling = 2.0; OSP_Barrier_group(OSP_GROUP_WORLD); for (msgsize = sizeof(double); msgsize < MAX_MSG_SIZE; msgsize *= 2) { if (rank == 0) { peer = 1; /** Local Completion **/ for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = OSP_Time_seconds(); OSP_PutAcc(peer, (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[peer] + (size_t)(i * msgsize)), msgsize, OSP_DOUBLE, (void *) &scaling); } t_stop = OSP_Time_seconds(); OSP_Flush(1); printf("%20d %20.2f ", msgsize, ((t_stop - t_start) * 1000000) / ITERATIONS); fflush(stdout); OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = OSP_Time_seconds(); OSP_PutAcc(1, (void *) ((size_t) buffer[0] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[1] + (size_t)(i * msgsize)), msgsize, OSP_DOUBLE, (void *) &scaling); OSP_Flush(1); } t_stop = OSP_Time_seconds(); printf("%20.2f \n", ((t_stop - t_start) * 1000000) / ITERATIONS); fflush(stdout); OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); } else { peer = 0; OSP_Barrier_group(OSP_GROUP_WORLD); /** Data Validation **/ for (i = 0; i < (((ITERATIONS + SKIP) * msgsize) / sizeof(double)); i++) { if (*(buffer[rank] + i) != ((1.0 + rank) + scaling * (1.0 + peer))) { printf("Data validation failed At displacement : %d Expected : %f Actual : %f \n", i, ((1.0 + rank) + scaling * (1.0 + peer)), *(buffer[rank] + i)); fflush(stdout); return -1; } } for (i = 0; i < (((ITERATIONS + SKIP) * MAX_MSG_SIZE) / sizeof(double)); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); /** Data Validation **/ for (i = 0; i < (((ITERATIONS + SKIP) * msgsize) / sizeof(double)); i++) { if (*(buffer[rank] + i) != ((1.0 + rank) + scaling * (1.0 + peer))) { printf("Data validation failed At displacement : %d Expected : %f Actual : %f \n", i, ((1.0 + rank) + scaling * (1.0 + peer)), *(buffer[rank] + i)); fflush(stdout); return -1; } } for (i = 0; i < (((ITERATIONS + SKIP) * MAX_MSG_SIZE) / sizeof(double)); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Barrier_group(OSP_GROUP_WORLD); } } OSP_Release_segments(OSP_GROUP_WORLD, buffer[rank]); OSP_Finalize(); return 0; }
int main() { int i, j, rank, nranks, msgsize, dest; int dim; long bufsize; double **buffer; int iterations; double t_start, t_stop, t_total, d_total, bw; int count[2], src_stride, trg_stride, stride_level; OSP_handle_t osp_handle; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); OSP_Barrier_group(OSP_GROUP_WORLD); bufsize = MAX_DIM * MAX_DIM * sizeof(double); buffer = (double **) malloc(sizeof(double *) * nranks); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Allocate_handle(&osp_handle); OSP_Barrier_group(OSP_GROUP_WORLD); if (rank == 0) { printf("OSP_GetS Bandwidth in MBPS \n"); printf("%30s %30s %22s \n", "MsgSize", "Dimensions(array of doubles)", "Latency"); fflush(stdout); dest = 1; src_stride = MAX_DIM * sizeof(double); trg_stride = MAX_DIM * sizeof(double); stride_level = 1; for (dim = 1; dim <= MAX_DIM; dim *= 2) { count[0] = dim*sizeof(double); count[1] = dim; iterations = (MAX_DIM * MAX_DIM)/(dim*dim); t_start = OSP_Time_seconds(); for (i = 0; i < iterations; i++) { OSP_NbGetS(1, stride_level, count, (void *) buffer[dest], &src_stride, (void *) buffer[rank], &trg_stride, osp_handle); } OSP_Wait_handle(osp_handle); t_stop = OSP_Time_seconds(); char temp[10]; sprintf(temp, "%dX%d", dim, dim); t_total = t_stop - t_start; d_total = (dim*dim*sizeof(double)*iterations)/(1024*1024); bw = d_total/t_total; printf("%30d %30s %20.2f \n", dim*dim*sizeof(double), temp, bw); fflush(stdout); for (i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { if (*(buffer[rank] + i * MAX_DIM + j) != (1.0 + dest)) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, (1.0 + dest), *(buffer[rank] + i * MAX_DIM + j)); fflush(stdout); return -1; } } } for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } } } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Release_segments(OSP_GROUP_WORLD, (void *) buffer[rank]); OSP_Free_segment((void *) buffer[rank]); OSP_Finalize(); return 0; }
int main() { size_t i, rank, nranks, msgsize, dest; long bufsize; double **buffer; double t_start, t_stop, t_latency; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); bufsize = MAX_MSG_SIZE * (ITERATIONS + SKIP); buffer = (double **) malloc(sizeof(double *) * nranks); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } OSP_Barrier_group(OSP_GROUP_WORLD); if (rank == 0) { printf("OSP_Get Latency in usec \n"); printf("%20s %22s \n", "Message Size", "Latency"); fflush(stdout); dest = 1; for (msgsize = sizeof(double); msgsize <= MAX_MSG_SIZE; msgsize *= 2) { for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = OSP_Time_seconds(); OSP_Get(1, (void *) ((size_t) buffer[dest] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), msgsize); } t_stop = OSP_Time_seconds(); printf("%20d %20.2f \n", msgsize, ((t_stop - t_start) * 1000000) / ITERATIONS); fflush(stdout); for (i = 0; i < (((ITERATIONS + SKIP) * msgsize) / sizeof(double)); i++) { if (*(buffer[rank] + i) != (1.0 + dest)) { printf("Data validation failed At displacement : %d Expected : %f Actual : %f \n", i, (1.0 + dest), *(buffer[rank] + i)); fflush(stdout); return -1; } } for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } } } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Release_segments(OSP_GROUP_WORLD, buffer[rank]); OSP_Finalize(); return 0; }
int main() { int i, j, k, rank, nranks, msgsize; int dim; long bufsize; double **buffer; unsigned long long t_start, t_stop, t_latency, t_overlap; unsigned long long wait_start, wait_stop; int count[2], src_stride, trg_stride, stride_level, peer; double A[1024][1024], B[1024][1024], C[1024][1024]; int m1,m2,m3; double expected, actual; OSP_handle_t osp_handle; OSP_Initialize(OSP_THREAD_SINGLE); rank = OSP_Process_id(OSP_GROUP_WORLD); nranks = OSP_Process_total(OSP_GROUP_WORLD); buffer = (double **) malloc (sizeof(double *) * nranks); OSP_Allocate_handle(&osp_handle); OSP_Barrier_group(OSP_GROUP_WORLD); bufsize = MAX_DIM * MAX_DIM * sizeof(double); OSP_Alloc_segment((void **) &(buffer[rank]), bufsize); OSP_Exchange_segments(OSP_GROUP_WORLD, (void **) buffer); for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } if(rank == 0) { printf("OSP_PutS Overlap - NbPutS + DGEMM + Wait. Time in cycles\n"); printf("%30s %30s %22s %22s\n", "Msg Size", "Dimensions(array of doubles)", "Base Latency", "Overlaped Latency"); fflush(stdout); src_stride = MAX_DIM*sizeof(double); trg_stride = MAX_DIM*sizeof(double); stride_level = 1; for(dim=1; dim<=MAX_DIM; dim*=2) { count[0] = dim*sizeof(double); count[1] = 512; peer = 1; for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = OSP_Time_cycles(); for(k=0; k<WINDOW; k++) { OSP_NbPutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, osp_handle); } OSP_Wait_handle(osp_handle); } t_stop = OSP_Time_cycles(); OSP_Flush(peer); t_latency = (t_stop-t_start)/ITERATIONS; char temp[10]; sprintf(temp,"%dX%d", count[1], dim); printf("%30d %30s %20lld", count[1]*count[0], temp, t_latency); fflush(stdout); t_start = OSP_Time_cycles(); for(i=0; i<ITERATIONS; i++) { for(k=0; k<WINDOW; k++) { OSP_NbPutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, osp_handle); } wait_start = OSP_Time_cycles(); for(m1=0; m1<1024; m1++) { for(m2=0; m2<1024; m2++) { for(m3=0; m3<1024; m3++) { C[m1][m2] += A[m1][m3] * B[m3][m2]; wait_stop = OSP_Time_cycles(); if((wait_stop - wait_start) > t_latency) break; } if((wait_stop - wait_start) > t_latency) break; } if((wait_stop - wait_start) > t_latency) break; } OSP_Wait_handle(osp_handle); } t_stop = OSP_Time_cycles(); OSP_Flush(peer); t_overlap = (t_stop - t_start)/ITERATIONS; printf("%20lld \n", t_overlap); } } OSP_Barrier_group(OSP_GROUP_WORLD); OSP_Release_handle(osp_handle); OSP_Release_segments(OSP_GROUP_WORLD, (void *) buffer[rank]); OSP_Free_segment((void *) buffer[rank]); OSP_Finalize(); return 0; }