//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_flag_alloc //-------------------------------------------------------------------------------------- // allocate space for one flag. Since multiple fit on a single cache line, we only // need to allocate new MPB space when all the existing lines are completely filled. A // flag line is a data structure that contains an array ("flag") of size RCCE_LINE_SIZE // characters. Each element in "flag" corresponds to a flag being in use (value is 1) // or not (value is 0). The actual value of the flag is stored in the MPB line pointed // to be the field "line_address," at the corresponding bit/byte location as in field // "flag." //-------------------------------------------------------------------------------------- int RCCE_flag_alloc(RCCE_FLAG *flag) { RCCE_FLAG_LINE *flagp; t_vcharp flag_addr; int c, loc; // find the head of the data structure that administers the flag variables flagp = &RCCE_flags; while (flagp->members == RCCE_FLAGS_PER_LINE && flagp->next) { flagp = flagp->next; } // if this is a new flag line, need to allocate MPB for it if (!flagp->line_address) flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE); if (!flagp->line_address) return(RCCE_error_return(RCCE_debug_synch, RCCE_ERROR_FLAG_NOT_ALLOCATED)); if (flagp->members < RCCE_FLAGS_PER_LINE) { // there is space in this line for a new flag; find first open slot for (loc=0; loc<RCCE_FLAGS_PER_LINE; loc++) { flag_addr = flagp->line_address + loc/RCCE_FLAGS_PER_BYTE; if (!((int)(flagp->flag[loc]))) { flagp->flag[loc] = (char) ((unsigned int) 1); flagp->members++; flag->location = loc; flag->line_address = flagp->line_address; flag->flag_addr = flag_addr; return(RCCE_SUCCESS); } } } else { // must create new flag line if last one was full flagp->next = (RCCE_FLAG_LINE *) malloc(sizeof(RCCE_FLAG_LINE)); if (!(flagp->next)) return(RCCE_error_return(RCCE_debug_synch, RCCE_ERROR_FLAG_NOT_ALLOCATED)); flagp = flagp->next; flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE); if (!(flagp->line_address)) return(RCCE_error_return(RCCE_debug_synch, RCCE_ERROR_FLAG_NOT_ALLOCATED)); // initialize the flag line flagp->members=1; flagp->next = NULL; for (c=1; c<RCCE_LINE_SIZE; c++) flagp->flag[c] = (char)((unsigned int) 0); // set first flag field to indicate the corresponding flag is now in use flagp->flag[0] = (char)((unsigned int) 1); flag->location = 0; flag->line_address = flagp->line_address; flag->flag_addr = flag->line_address; } return(RCCE_SUCCESS); }
int RCCE_APP(int argc, char **argv){ int ID, ID_nb, ID_donor, nrounds, error, strlength; RCCE_FLAG flag_sent, flag_ack; double *cbuffer, *buffer, sum; char msg[RCCE_MAX_ERROR_STRING]; RCCE_init(&argc, &argv); ID = RCCE_ue(); ID_nb = (ID+1)%RCCE_num_ues(); ID_donor = (ID-1+RCCE_num_ues())%RCCE_num_ues(); if (argc != 2) { if (ID==0) printf("Executable requires one parameter (number of rounds): %d\n",argc-1); return(1); } nrounds = atoi(*++argv); if (nrounds < 0) { if (ID==0) printf("Number of rounds should be non-negative: %d\n", nrounds); return(1); } /* allocate private memory and comm buffer space */ buffer = (double *) malloc(BUFSIZE*sizeof(double)); if (!buffer) printf("Mark 01: Failed to allocate private buffer on proc %d\n", ID); cbuffer = (double *) RCCE_malloc(BUFSIZE*sizeof(double)); if (!buffer) printf("Mark 02:RCCE failed to allocate %d doubles on proc %d\n", BUFSIZE, ID); /* initialize buffer with UE-specific data */ for (int i=0; i<BUFSIZE; i++) buffer[i] = (double)(ID+1+i); sum = 0.0; for (int i=0; i<BUFSIZE; i++) sum += buffer[i]; printf("Initial sum on UE %03d equals %f\n", ID, sum); /* create and initialize flag variables */ if (error=RCCE_flag_alloc(&flag_sent)) printf("Mark 03a: Could not allocate flag_sent on %d, error=%d\n", ID, error); if (error=RCCE_flag_alloc(&flag_ack)) printf("Mark 03b: Could not allocate flag_ack on %d, error=%d\n", ID, error); if(error=RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID)) printf("Mark 04: Could not initialize flag_sent on %d, error=%d\n", ID, error); if(error=RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor)) printf("Mark 05: Could not initialize flag_ack on %d, error=%d\n", ID_donor, error); for (int round=0; round<nrounds; round++) { int size = BUFSIZE*sizeof(double); RCCE_wait_until(flag_ack, RCCE_FLAG_SET); RCCE_flag_write(&flag_ack, RCCE_FLAG_UNSET, ID); RCCE_put((t_vcharp)cbuffer, (t_vcharp)buffer, size, ID_nb); RCCE_flag_write(&flag_sent, RCCE_FLAG_SET, ID_nb); RCCE_wait_until(flag_sent, RCCE_FLAG_SET); RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID); RCCE_get((t_vcharp)buffer, (t_vcharp)cbuffer, size, ID); RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor); } /* compute local sum */ sum = 0.0; for (int i=0; i<BUFSIZE; i++) sum += buffer[i]; printf("Final sum on UE %03d equals %f\n", ID, sum); RCCE_finalize(); return(0); }
int RCCE_APP(int argc, char **argv) { /* statically allocated space sits in off-chip private memory */ float a[NXNY], *buff; int i, offset, iter=10, tile; int MY_ID; int NTILES1; double time; RCCE_FLAG flag0, flag1; RCCE_init(&argc, &argv); NTILES1 = RCCE_num_ues()-1; MY_ID = RCCE_ue(); if (NX%8) { printf("Grid width should be multiple of 8: %d\n", NX); exit(1); } if (argc>1) iter=atoi(*++argv); if (MY_ID==0) printf("Executing %d iterations\n", iter); /* allocate space on the comm buffer */ buff = (float *) RCCE_malloc(sizeof(float)*2*NX); /* Allocate flags to coordinate comm. */ if (RCCE_flag_alloc(&flag0)) return(1); if (RCCE_flag_alloc(&flag1)) return(1); /* initialize array a on all tiles; this stuffs a into private caches */ for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0; if (MY_ID == 0) for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0; if (MY_ID == NTILES1) for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0; /* put in a barrier so everybody can be sure to have initialized */ RCCE_barrier(&RCCE_COMM_WORLD); /* main loop */ if (MY_ID==0) time = RCCE_wtime(); while ((iter--)>0){ /* start with copying fringe data to neighboring tiles */ if (MY_ID!=NTILES1) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1); RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); } if (MY_ID != 0) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag1, 0, MY_ID-1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1); RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); } /* Make sure the data has been recvd and copy data out of buffer(s) */ if (MY_ID!=NTILES1) { RCCE_wait_until(flag1, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID); } if (MY_ID!=0) { RCCE_wait_until(flag0, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID); } /* apply the stencil operation */ for (i=0; i<NXNY2; i++) { a[i+O3] += W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5]; } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { time = RCCE_wtime()-time; } /* print result strip by strip; this would not be done on RC */ for (int id=0; id<=NTILES1; id++) { RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==id) { int start = NX; int end = NXNY1; if (MY_ID==0) start = 0; if (MY_ID == NTILES1) end = NXNY; for (offset=0, i=start; i<end; i++) { if (!(i%NX)) printf("\n"); // comment out next line and uncomment subsequent three to print error printf("%f ",a[i+offset]); // int jj=i/NX+(MY_ID*(NY-1)); // double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1)); // printf("%f ",a[i+offset]-aexact); } } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { printf("\nTotal time: %lf\n", time); } RCCE_finalize(); return(0); }