int RCCE_APP(int argc, char **argv){ int YOU, ME, nrounds = 1024*1024, actualrounds, size, N=32, round, pair, index; int bigsize, subindex, roundsize; double timer; char buffer[1024*1024*4]; RCCE_init(&argc, &argv); // RCCE_debug_set(RCCE_DEBUG_ALL); ME = RCCE_ue(); YOU = !ME; if (argc>1) nrounds = atoi(*++argv); if (nrounds<1) { if (!ME) printf("Pingpong needs at least 1 round; try again\n"); return(1); } if (RCCE_num_ues() != 2) { if (!ME) printf("Pingpong needs at two UEs; try again\n"); return(1); } bigsize = 32; for (index=0; index<17; index++) { size = bigsize; for (subindex=0; subindex<4; subindex++) { roundsize = max(32,size - size%32); // synchronize before starting the timer RCCE_barrier(&RCCE_COMM_WORLD); timer = RCCE_wtime(); actualrounds = max(10,(nrounds*32)/roundsize); for (round=0; round <actualrounds; round++) { if (ME) { RCCE_send(buffer, roundsize, YOU); RCCE_recv(buffer, roundsize, YOU); } else { RCCE_recv(buffer, roundsize, YOU); RCCE_send(buffer, roundsize, YOU); } } timer = RCCE_wtime()-timer; if (ME) printf("%d %1.9lf\n", roundsize, timer/actualrounds); size *= 1.18920712; } bigsize *= 2; } RCCE_finalize(); return(0); }
void timer_start(int np) { int n = np; start_time(n) = RCCE_wtime(); return; }
void timer_start(int *np) { int n = *np; start(n) = RCCE_wtime(); return; }
/* * Seeding the rand() */ void srand_core() { double timed_ = RCCE_wtime(); unsigned int timeprfx_ = (unsigned int) timed_; unsigned int time_ = (unsigned int) ((timed_ - timeprfx_) * 1000000); srand(time_ + (13 * (RCCE_ue() + 1))); }
void timer_stop(int *np) { int n = *np; double t, now; now = RCCE_wtime(); t = now - start(n); elapsed(n) = elapsed(n) + t; return; }
int RCCE_send(char *privbuf, size_t size, int dest) { #ifdef MEASURE_TIME double send_start = RCCE_wtime(); #endif if (dest<0 || dest >= RCCE_NP) { return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); } errval_t err = send_message(privbuf, size, RC_COREID[dest]); assert(err_is_ok(err)); #ifdef MEASURE_TIME measure_rcce_time += RCCE_wtime() - send_start; #endif #ifdef MEASURE_DATA measure_rcce_data[rcce_curphase][dest] += size; #endif return (RCCE_SUCCESS); }
int RCCE_finalize(void) { #ifdef MEASURE_TIME double measure_end = RCCE_wtime(); printf("%d: Time spent in RCCE communication %.5g seconds. " "%.5g seconds total program run-time.\n", RCCE_ue(), measure_rcce_time, measure_end - measure_start); #endif #ifdef MEASURE_DATA for(int phase = 0; phase < MAX_PHASES; phase++) { printf("%d: Phase %d: ", RCCE_ue(), phase); for(int i = 0; i < RCCE_NP; i++) { printf("%lu ", measure_rcce_data[phase][i]); } printf("\n"); } #endif return (RCCE_SUCCESS); }
int RCCE_APP(int argc, char **argv) { int i; int num_ranks; int remote_rank, my_rank; int numrounds = NUMROUNDS; int maxlen = DEFAULTLEN; int length; int round; double timer; RCCE_SEND_REQUEST send_request; RCCE_RECV_REQUEST recv_request; RCCE_init(&argc, &argv); my_rank = RCCE_ue(); num_ranks = RCCE_num_ues(); if(argc > 1) numrounds = atoi(argv[1]); if(numrounds < 1) { if(my_rank == 0) fprintf(stderr, "Pingping needs at least 1 round; try again\n"); exit(-1); } if(argc > 2) maxlen = atoi(argv[2]); if(maxlen < 1) { if(my_rank == 0) fprintf(stderr, "Illegal message size: %s; try again\n", argv[2]); exit(-1); } else if(maxlen > MAXBUFSIZE) { if(my_rank == 0) fprintf(stderr, "Message size %d is too big; try again\n", maxlen); exit(-1); } if(num_ranks != 2) { if(my_rank == 0) fprintf(stderr, "Pingping needs exactly two UEs; try again\n"); exit(-1); } remote_rank = (my_rank + 1) % 2; if(my_rank == 0) printf("#bytes\t\tusec\t\tMB/sec\n"); for(length=1; length <= maxlen; length*=2) { #ifdef _CACHE_WARM_UP_ for(i=0; i < length; i++) { /* cache warm-up: */ dummy += send_buffer[i]; dummy += recv_buffer[i]; } #endif /* synchronize before starting PING-PING: */ RCCE_barrier(&RCCE_COMM_WORLD); for(round=0; round < numrounds+1; round++) { #ifdef _ERROR_CHECK_ for(i=0; i < length; i++) { send_buffer[i] = (i+length+round) % 127; } #endif /* send PING: */ RCCE_isend(send_buffer, length, remote_rank, &send_request); /* recv PING: */ RCCE_irecv(recv_buffer, length, remote_rank, &recv_request); /* wait for completion: */ RCCE_isend_wait(&send_request); RCCE_irecv_wait(&recv_request); /* start timer: */ if(round==0) timer = RCCE_wtime(); #ifdef _ERROR_CHECK_ for(i=0; i < length; i++) { if(recv_buffer[i] != (i+length+round) % 127) { fprintf(stderr, "ERROR: %d VS %d\n", recv_buffer[i], (i+length+round) % 127); exit(-1); } } #endif } /* stop timer: */ timer = RCCE_wtime() - timer; if(my_rank == 0) printf("%d\t\t%1.2lf\t\t%1.2lf\n", length, timer/(numrounds)*1000000, (length / (timer/(numrounds))) / (1024*1024) ); } RCCE_finalize(); return 0; }
int RCCE_APP(int argc, char **argv){ float a[NXNY]; int i, offset, iter=3; int fdiv, vlevel; int ID, ID_right, ID_left; int NTILES1; double time; RCCE_REQUEST req; RCCE_init(&argc, &argv); // RCCE_debug_set(RCCE_DEBUG_ALL); NTILES1 = RCCE_num_ues()-1; ID = RCCE_ue(); ID_right = (ID+1)%RCCE_num_ues(); ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues(); // set the relevant areas of the board to the default frequency and voltage RCCE_set_frequency_divider(8, &fdiv); if (ID==0)print_dividers(); // return(0); // RCCE_iset_power(3, &req, &fdiv, &vlevel); // if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel); // RCCE_wait_power(&req); // RCCE_set_frequency_divider(3, &fdiv); if (NX%8) { printf("Grid width should be multiple of 8: %d\n", NX); exit(1); } if (argc>1) iter=atoi(*++argv); if (!ID) printf("Core %d Executing %d iterations\n", ID, iter); /* initialize array a on all tiles; this stuffs a into private caches */ for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0; if (ID == 0) for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0; if (ID == NTILES1) for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0; /* main loop */ if (ID==0) time = RCCE_wtime(); while ((iter--)>0){ RCCE_iset_power(3, &req, &fdiv, &vlevel); if (ID==RCCE_power_domain_master()) printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); fflush(NULL); if (!(iter%100)) printf("Iteration %d\n", iter); /* start with copying fringe data to neighboring tiles; we need to group semantic send/recv pairs together to avoid deadlock */ if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right); if (ID != 0) RCCE_recv((char*)(&a[0]), NX*sizeof(float), ID_left); RCCE_wait_power(&req); if (ID!=0) RCCE_send((char *)(&a[NX]), NX*sizeof(float), ID_left); if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right); RCCE_iset_power(3, &req, &fdiv, &vlevel); RCCE_set_frequency_divider(3, &fdiv); if (ID==RCCE_power_domain_master()) printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); fflush(NULL); /* apply the stencil operation */ for (i=0; i<NXNY2; i++) { a[i+O3] += W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5]; } RCCE_wait_power(&req); } // /* print result strip by strip; this would not be done on RC */ // for (int id=0; id<=NTILES1; id++) { // RCCE_barrier(&RCCE_COMM_WORLD); // if (ID==id) { // int start = NX; int end = NXNY1; // if (ID==0) start = 0; // if (ID == NTILES1) end = NXNY; // for (offset=0, i=start; i<end; i++) { // if (!(i%NX)) printf("\n"); //// comment out next line and uncomment subsequent three to print error // printf("%1.5f ",a[i+offset]); fflush(stdout); //// int jj=i/NX+(ID*(NY-1)); //// double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1)); //// printf("%f ",a[i+offset]-aexact); // } // } // } // RCCE_barrier(&RCCE_COMM_WORLD); // if (ID==0) { // printf("\n"); // time = RCCE_wtime()-time; // printf("Total time: %lf\n", time); // } //reset the relevant areas of the board to the default frequency and voltage // RCCE_set_frequency_divider(8, &fdiv); // RCCE_iset_power(2, &req, &fdiv, &vlevel); // if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel); // RCCE_wait_power(&req); // RCCE_set_frequency_divider(3, &fdiv); RCCE_barrier(&RCCE_COMM_WORLD); if (ID==0)print_dividers(); RCCE_finalize(); return(0); }
double MPI_Wtime(void) { // Somehow, this does not work; must replace MPI_Wtime with RCCE_wtime directly return(RCCE_wtime()); }
int RCCE_init(int *argc, char ***argv) { int ue; void *nothing = NULL; assert(*argc >= 3); setup_routes(*argc, *argv); // save pointer to executable name for later insertion into the argument list char *executable_name = (*argv)[0]; RCCE_NP = atoi(*(++(*argv))); RC_REFCLOCKGHZ = atof(*(++(*argv))); if(RC_REFCLOCKGHZ == 0) { printf("Barrelfish RCCE extension: Computing reference clock GHz automatically...\n"); uint64_t tscperms; errval_t err = sys_debug_get_tsc_per_ms(&tscperms); assert(err_is_ok(err)); RC_REFCLOCKGHZ = ((double)tscperms) / 1000000.0; printf("Reference clock computed to be %.2g\n", RC_REFCLOCKGHZ); } // put the participating core ids (unsorted) into an array for (ue=0; ue<RCCE_NP; ue++) { RC_COREID[ue] = atoi(*(++(*argv))); } // make sure executable name is as expected (*argv)[0] = executable_name; RC_MY_COREID = MYCOREID(); // adjust apparent number of command line arguments, so it will appear to main // program that number of UEs, clock frequency, and core ID list were not on // command line *argc -= RCCE_NP+2; // sort array of participating phyical core IDs to determine their ranks qsort((char *)RC_COREID, RCCE_NP, sizeof(int), id_compare); // determine rank of calling core for (ue=0; ue<RCCE_NP; ue++) { if (RC_COREID[ue] == RC_MY_COREID) RCCE_IAM = ue; } // leave in one reassuring debug print printf("My rank is %d, physical core ID is %d\n", RCCE_IAM, RC_MY_COREID); if (RCCE_IAM<0) { return(RCCE_ERROR_CORE_NOT_IN_HOSTFILE); } // create global communicator (equivalent of MPI_COMM_WORLD); this will also allocate // the two synchronization flags associated with the global barrier RCCE_comm_split(RCCE_global_color, nothing, &RCCE_COMM_WORLD); #ifdef MEASURE_TIME measure_start = RCCE_wtime(); measure_rcce_time = 0.0; #endif #ifdef MEASURE_DATA memset(measure_rcce_data, 0, sizeof(measure_rcce_data)); #endif return (RCCE_SUCCESS); }
int RCCE_recv(char *privbuf, size_t size, int source) { errval_t err; #ifdef MEASURE_TIME double recv_start = RCCE_wtime(); #endif #ifdef RCCE_PERF_MEASURE dispatcher_handle_t handle = curdispatcher(); struct dispatcher_shared_generic* d = get_dispatcher_shared_generic(handle); #endif if (source<0 || source >= RCCE_NP) { return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); } int core_id = RC_COREID[source]; struct msg_buf *mb = &msgbuf[core_id]; #ifdef BULK_TRANSFER_ENABLED mb->bulk_ready = true; mb->length = size; mb->current = 0; mb->msg = privbuf; #endif dprintf("%d: R(%lu,%d,%p,%d,%p)\n", my_core_id, size, source, mb, mb->pending, privbuf); #ifdef BULK_TRANSFER_ENABLED err = barray[core_id]->tx_vtbl.bulk_recv_ready(barray[core_id], NOP_CONT, my_core_id, size); assert(err_is_ok(err)); #endif PERF(30); while(!mb->pending) { messages_wait_and_handle_next(); } PERF(31); dprintf("%d: msg arrived\n", my_core_id); /* if(size <= DEFAULT_UMP_BUFLEN) { */ #ifndef BULK_TRANSFER_ENABLED assert(size == mb->length); memcpy(privbuf, mb->msg, size); /* } else { */ #else assert(mb->bulk); #endif /* } */ mb->pending = false; #ifndef BULK_TRANSFER_ENABLED assert(!mb->bulk); free(mb->msg); PERF(32); err = barray[core_id]->tx_vtbl.message_reply(barray[core_id], NOP_CONT, my_core_id); PERF(33); assert(err_is_ok(err)); #else assert(mb->bulk); #endif #ifdef MEASURE_TIME measure_rcce_time += RCCE_wtime() - recv_start; #endif return (RCCE_SUCCESS); }
int RCCE_APP(int argc, char **argv) { /* statically allocated space sits in off-chip private memory */ float a[NXNY], *buff; int i, offset, iter=10, tile; int MY_ID; int NTILES1; double time; RCCE_FLAG flag0, flag1; RCCE_init(&argc, &argv); NTILES1 = RCCE_num_ues()-1; MY_ID = RCCE_ue(); if (NX%8) { printf("Grid width should be multiple of 8: %d\n", NX); exit(1); } if (argc>1) iter=atoi(*++argv); if (MY_ID==0) printf("Executing %d iterations\n", iter); /* allocate space on the comm buffer */ buff = (float *) RCCE_malloc(sizeof(float)*2*NX); /* Allocate flags to coordinate comm. */ if (RCCE_flag_alloc(&flag0)) return(1); if (RCCE_flag_alloc(&flag1)) return(1); /* initialize array a on all tiles; this stuffs a into private caches */ for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0; if (MY_ID == 0) for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0; if (MY_ID == NTILES1) for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0; /* put in a barrier so everybody can be sure to have initialized */ RCCE_barrier(&RCCE_COMM_WORLD); /* main loop */ if (MY_ID==0) time = RCCE_wtime(); while ((iter--)>0){ /* start with copying fringe data to neighboring tiles */ if (MY_ID!=NTILES1) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1); RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); } if (MY_ID != 0) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag1, 0, MY_ID-1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1); RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); } /* Make sure the data has been recvd and copy data out of buffer(s) */ if (MY_ID!=NTILES1) { RCCE_wait_until(flag1, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID); } if (MY_ID!=0) { RCCE_wait_until(flag0, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID); } /* apply the stencil operation */ for (i=0; i<NXNY2; i++) { a[i+O3] += W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5]; } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { time = RCCE_wtime()-time; } /* print result strip by strip; this would not be done on RC */ for (int id=0; id<=NTILES1; id++) { RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==id) { int start = NX; int end = NXNY1; if (MY_ID==0) start = 0; if (MY_ID == NTILES1) end = NXNY; for (offset=0, i=start; i<end; i++) { if (!(i%NX)) printf("\n"); // comment out next line and uncomment subsequent three to print error printf("%f ",a[i+offset]); // int jj=i/NX+(MY_ID*(NY-1)); // double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1)); // printf("%f ",a[i+offset]-aexact); } } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { printf("\nTotal time: %lf\n", time); } RCCE_finalize(); return(0); }