//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_barrier //-------------------------------------------------------------------------------------- // very simple, linear barrier //-------------------------------------------------------------------------------------- int RCCE_barrier(RCCE_COMM *comm) { int counter, i, error; int ROOT = 0; volatile unsigned char cyclechar[RCCE_LINE_SIZE]; volatile unsigned char valchar[RCCE_LINE_SIZE]; volatile int *cycle; volatile int *val; counter = 0; cycle = (volatile int *)cyclechar; val = (volatile int *)valchar; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); // flip local barrier variable if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); if (error = RCCE_put((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier while (counter != comm->size) { // skip the first member (#0), because that is the ROOT for (counter=i=1; i<comm->size; i++) { /* copy flag values out of comm buffer */ if (error = RCCE_get(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == *cycle) counter++; } } // set release flags for (i=1; i<comm->size; i++) { if (error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { if (error = RCCE_wait_until(comm->release, *cycle)) return(RCCE_error_return(RCCE_debug_synch,error)); } if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); }
//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_barrier //-------------------------------------------------------------------------------------- // very simple, linear barrier //-------------------------------------------------------------------------------------- int RCCE_barrier(RCCE_COMM *comm) { int counter, i, error; int ROOT = 0; t_vchar cyclechar[RCCE_LINE_SIZE]; t_vchar valchar [RCCE_LINE_SIZE]; t_vcharp gatherp, releasep; RCCE_FLAG_STATUS cycle; counter = 0; gatherp = comm->gather.line_address; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); // flip local barrier variable if (error = RCCE_get(cyclechar, gatherp, RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); cycle = RCCE_flip_bit_value(cyclechar, comm->gather.location); if (error = RCCE_put(comm->gather.line_address, cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier while (counter != comm->size) { // skip the first member (#0), because that is the ROOT for (counter=i=1; i<comm->size; i++) { // copy flag values out of comm buffer if (error = RCCE_get(valchar, comm->gather.line_address, RCCE_LINE_SIZE, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_bit_value(valchar, comm->gather.location) == cycle) counter++; } } // set release flags for (i=1; i<comm->size; i++) if (error = RCCE_flag_write(&(comm->release), cycle, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); } else { if (error = RCCE_wait_until(comm->release, cycle)) return(RCCE_error_return(RCCE_debug_synch,error)); } if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); }
int RCCE_APP(int argc, char **argv){ int ID, ID_nb, ID_donor, nrounds, error, strlength; RCCE_FLAG flag_sent, flag_ack; double *cbuffer, *buffer, sum; char msg[RCCE_MAX_ERROR_STRING]; RCCE_init(&argc, &argv); ID = RCCE_ue(); ID_nb = (ID+1)%RCCE_num_ues(); ID_donor = (ID-1+RCCE_num_ues())%RCCE_num_ues(); if (argc != 2) { if (ID==0) printf("Executable requires one parameter (number of rounds): %d\n",argc-1); return(1); } nrounds = atoi(*++argv); if (nrounds < 0) { if (ID==0) printf("Number of rounds should be non-negative: %d\n", nrounds); return(1); } /* allocate private memory and comm buffer space */ buffer = (double *) malloc(BUFSIZE*sizeof(double)); if (!buffer) printf("Mark 01: Failed to allocate private buffer on proc %d\n", ID); cbuffer = (double *) RCCE_malloc(BUFSIZE*sizeof(double)); if (!buffer) printf("Mark 02:RCCE failed to allocate %d doubles on proc %d\n", BUFSIZE, ID); /* initialize buffer with UE-specific data */ for (int i=0; i<BUFSIZE; i++) buffer[i] = (double)(ID+1+i); sum = 0.0; for (int i=0; i<BUFSIZE; i++) sum += buffer[i]; printf("Initial sum on UE %03d equals %f\n", ID, sum); /* create and initialize flag variables */ if (error=RCCE_flag_alloc(&flag_sent)) printf("Mark 03a: Could not allocate flag_sent on %d, error=%d\n", ID, error); if (error=RCCE_flag_alloc(&flag_ack)) printf("Mark 03b: Could not allocate flag_ack on %d, error=%d\n", ID, error); if(error=RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID)) printf("Mark 04: Could not initialize flag_sent on %d, error=%d\n", ID, error); if(error=RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor)) printf("Mark 05: Could not initialize flag_ack on %d, error=%d\n", ID_donor, error); for (int round=0; round<nrounds; round++) { int size = BUFSIZE*sizeof(double); RCCE_wait_until(flag_ack, RCCE_FLAG_SET); RCCE_flag_write(&flag_ack, RCCE_FLAG_UNSET, ID); RCCE_put((t_vcharp)cbuffer, (t_vcharp)buffer, size, ID_nb); RCCE_flag_write(&flag_sent, RCCE_FLAG_SET, ID_nb); RCCE_wait_until(flag_sent, RCCE_FLAG_SET); RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID); RCCE_get((t_vcharp)buffer, (t_vcharp)cbuffer, size, ID); RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor); } /* compute local sum */ sum = 0.0; for (int i=0; i<BUFSIZE; i++) sum += buffer[i]; printf("Final sum on UE %03d equals %f\n", ID, sum); RCCE_finalize(); return(0); }
//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_send_general //-------------------------------------------------------------------------------------- // Synchronized send function (gory and non-gory mode) //-------------------------------------------------------------------------------------- static int RCCE_send_general( char *privbuf, // source buffer in local private memory (send buffer) t_vcharp combuf, // intermediate buffer in MPB size_t chunk, // size of MPB available for this message (bytes) RCCE_FLAG *ready, // flag indicating whether receiver is ready RCCE_FLAG *sent, // flag indicating whether message has been sent by source size_t size, // size of message (bytes) int dest, // UE that will receive the message int copy, // set to 0 for synchronization only (no copying/sending) int pipe, // use pipelining? int mcast, // multicast? void* tag, // additional tag? int len, // length of additional tag RCCE_FLAG *probe // flag for probing for incoming messages ) { char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size size_t wsize, // offset within send buffer when putting in "chunk" bytes remainder, // bytes remaining to be sent nbytes; // number of bytes to be sent in single RCCE_put call char *bufptr; // running pointer inside privbuf for current location #ifdef USE_REMOTE_PUT_LOCAL_GET if(mcast) return(RCCE_error_return(1, RCCE_ERROR_NO_MULTICAST_SUPPORT)); #endif if(probe) #ifdef USE_TAGGED_FLAGS RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, tag, len); #else RCCE_flag_write(probe, RCCE_FLAG_SET, dest); #endif #ifdef USE_SYNCH_FOR_ZERO_BYTE // synchronize even in case of zero byte messages: if(size == 0) { #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); #ifdef USE_TAGGED_FLAGS if(!probe) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) #ifdef USE_TAGGED_FLAGS if(!probe) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET return(RCCE_SUCCESS); } #endif // USE_SYNCH_FOR_ZERO_BYTE if(!pipe) { // send data in units of available chunk size of comm buffer for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { bufptr = privbuf + wsize; nbytes = chunk; #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); if(!mcast) { #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); // wait for the destination to be ready to receive a message RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); } else { RCCE_TNS_barrier(&RCCE_COMM_WORLD); RCCE_TNS_barrier(&RCCE_COMM_WORLD); } #endif // !USE_REMOTE_PUT_LOCAL_GET } // for } else // if(!pipe) -> if(pipe) { // pipelined version of send/recv: size_t subchunk1, subchunk2; for(wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) { if(wsize == 0) { // allign sub-chunks to cache line granularity: subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; subchunk2 = chunk - subchunk1; } bufptr = privbuf + wsize; nbytes = subchunk1; #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data chunk 1 to remote comm buffer if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data chunk 1 to own comm buffer if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET bufptr = privbuf + wsize + subchunk1; nbytes = subchunk2; #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data chunk 2 to remote comm buffer if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, dest); RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data chunk 2 to own comm buffer if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM); RCCE_flag_write(sent, RCCE_FLAG_SET, dest); RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET } //for } // if(pipe) remainder = size%chunk; // if nothing is left over, we are done if (!remainder) return(RCCE_SUCCESS); // send remainder of data--whole cache lines bufptr = privbuf + (size/chunk)*chunk; nbytes = remainder - remainder%RCCE_LINE_SIZE; if (nbytes) { #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(copy) RCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM); if(!mcast) { #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); // wait for the destination to be ready to receive a message RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); } else { RCCE_TNS_barrier(&RCCE_COMM_WORLD); RCCE_TNS_barrier(&RCCE_COMM_WORLD); } #endif // !USE_REMOTE_PUT_LOCAL_GET } // if(nbytes) remainder = remainder%RCCE_LINE_SIZE; if (!remainder) return(RCCE_SUCCESS); // remainder is less than a cache line. This must be copied into appropriately sized // intermediate space before it can be sent to the receiver bufptr = privbuf + (size/chunk)*chunk + nbytes; nbytes = RCCE_LINE_SIZE; if(copy) { #ifdef COPPERRIDGE memcpy_scc(padline,bufptr,remainder); #else memcpy(padline,bufptr,remainder); #endif } #ifdef USE_REMOTE_PUT_LOCAL_GET RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(copy) RCCE_put(combuf, (t_vcharp) padline, nbytes, dest); #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(copy) RCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM); if(!mcast) { #ifdef USE_TAGGED_FLAGS if( (wsize == 0) && (!probe) ) RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); else #endif RCCE_flag_write(sent, RCCE_FLAG_SET, dest); // wait for the destination to be ready to receive a message RCCE_wait_until(*ready, RCCE_FLAG_SET); RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); } else { RCCE_TNS_barrier(&RCCE_COMM_WORLD); RCCE_TNS_barrier(&RCCE_COMM_WORLD); } #endif // !USE_REMOTE_PUT_LOCAL_GET return(RCCE_SUCCESS); }
static int RCCE_push_send_request(RCCE_SEND_REQUEST *request) { char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size int test; // flag for calling RCCE_test_flag() if(request->finished) return(RCCE_SUCCESS); if(request->label == 1) goto label1; if(request->label == 2) goto label2; if(request->label == 3) goto label3; if(request->label == 4) goto label4; if(request->probe) #ifdef USE_TAGGED_FLAGS RCCE_flag_write_tagged(request->probe, RCCE_FLAG_SET, request->dest, request->tag, request->len); #else RCCE_flag_write(request->probe, RCCE_FLAG_SET, request->dest); #endif #ifdef USE_SYNCH_FOR_ZERO_BYTE // synchronize even in case of zero byte messages: if(request->size == 0) { #ifdef USE_REMOTE_PUT_LOCAL_GET label1: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 1; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); #ifdef USE_TAGGED_FLAGS if(!request->probe) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); #else // LOCAL PUT / REMOTE GET: (standard) #ifdef USE_TAGGED_FLAGS if(!request->probe) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); label1: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 1; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET request->finished = 1; return(RCCE_SUCCESS); } #endif // USE_SYNCH_FOR_ZERO_BYTE // send data in units of available chunk size of comm buffer for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { request->bufptr = request->privbuf + request->wsize; request->nbytes = request->chunk; #ifdef USE_REMOTE_PUT_LOCAL_GET // wait for the destination to be ready to receive a message label2: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 2; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest); #ifdef USE_TAGGED_FLAGS if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); #ifdef USE_TAGGED_FLAGS if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); // wait for the destination to be ready to receive a message label2: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 2; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET } // for request->remainder = request->size % request->chunk; // if nothing is left over, we are done if (!request->remainder) { request->finished = 1; return(RCCE_SUCCESS); } // send remainder of data--whole cache lines request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; if (request->nbytes) { #ifdef USE_REMOTE_PUT_LOCAL_GET // wait for the destination to be ready to receive a message label3: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 3; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest); #ifdef USE_TAGGED_FLAGS if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(request->copy) RCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM); #ifdef USE_TAGGED_FLAGS if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); // wait for the destination to be ready to receive a message label3: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 3; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET } // if(request->nbytes) request->remainder = request->size % request->chunk; request->remainder = request->remainder%RCCE_LINE_SIZE; // if nothing is left over, we are done if (!request->remainder) { request->finished = 1; return(RCCE_SUCCESS); } // remainder is less than a cache line. This must be copied into appropriately sized // intermediate space before it can be sent to the receiver request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; request->nbytes = RCCE_LINE_SIZE; #ifdef USE_REMOTE_PUT_LOCAL_GET // wait for the destination to be ready to receive a message label4: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 4; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); // copy private data to remote comm buffer if(request->copy) { #ifdef COPPERRIDGE memcpy_scc(padline,request->bufptr,request->remainder); #else memcpy(padline,request->bufptr,request->remainder); #endif RCCE_put(request->combuf, (t_vcharp) padline, request->nbytes, request->dest); } #ifdef USE_TAGGED_FLAGS #ifdef USE_PROBE_FLAGS_SHORTCUT if(request->privbuf == NULL) { request->finished = 1; return(RCCE_SUCCESS); } #endif if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); #else // LOCAL PUT / REMOTE GET: (standard) // copy private data to own comm buffer if(request->copy) { #ifdef COPPERRIDGE memcpy_scc(padline,request->bufptr,request->remainder); #else memcpy(padline,request->bufptr,request->remainder); #endif RCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM); } #ifdef USE_TAGGED_FLAGS if( (request->wsize == 0) && (!request->probe) ) RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); else #endif RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); // wait for the destination to be ready to receive a message label4: RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); if(!test) { request->label = 4; return(RCCE_PENDING); } RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); #endif // !USE_REMOTE_PUT_LOCAL_GET request->finished = 1; return(RCCE_SUCCESS); }
//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_nb_barrier //-------------------------------------------------------------------------------------- // non-blocking version of the linear barrier //-------------------------------------------------------------------------------------- int RCCE_nb_barrier(RCCE_COMM *comm) { volatile unsigned char cyclechar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); volatile unsigned char valchar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); int i, error; int ROOT = 0; #ifdef USE_FLAG_EXPERIMENTAL volatile char *cycle; volatile char *val; cycle = (volatile char *)cyclechar; val = (volatile char *)valchar; #else volatile int *cycle; volatile int *val; cycle = (volatile int *)cyclechar; val = (volatile int *)valchar; #endif if(comm->label == 1) goto label1; if(comm->label == 2) goto label2; comm->count = 0; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); #ifdef USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT]))) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier comm->cycle = *cycle; label1: while (comm->count != comm->size) { // skip the first member (#0), because that is the ROOT for (comm->count=i=1; i<comm->size; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == comm->cycle) comm->count++; } if(comm->count != comm->size) { comm->label = 1; return(RCCE_PENDING); } } // set release flags for (i=1; i<comm->size; i++) { if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { int test; comm->cycle = *cycle; label2: RCCE_test_flag(comm->release, comm->cycle, &test); if(!test) { comm->label = 2; return(RCCE_PENDING); } } comm->label = 0; #else // !USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier comm->cycle = *cycle; label1: while (comm->count != comm->size) { // skip the first member (#0), because that is the ROOT for (comm->count=i=1; i<comm->size; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, comm->member[i]))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, comm->member[i]))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == comm->cycle) comm->count++; } if(comm->count != comm->size) { comm->label = 1; return(RCCE_PENDING); } } // set release flags for (i=1; i<comm->size; i++) { if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { int test; comm->cycle = *cycle; label2: RCCE_test_flag(comm->release, comm->cycle, &test); if(!test) { comm->label = 2; return(RCCE_PENDING); } } comm->label = 0; #endif // !USE_FAT_BARRIER if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); }
int RCCE_APP(int argc, char **argv) { /* statically allocated space sits in off-chip private memory */ float a[NXNY], *buff; int i, offset, iter=10, tile; int MY_ID; int NTILES1; double time; RCCE_FLAG flag0, flag1; RCCE_init(&argc, &argv); NTILES1 = RCCE_num_ues()-1; MY_ID = RCCE_ue(); if (NX%8) { printf("Grid width should be multiple of 8: %d\n", NX); exit(1); } if (argc>1) iter=atoi(*++argv); if (MY_ID==0) printf("Executing %d iterations\n", iter); /* allocate space on the comm buffer */ buff = (float *) RCCE_malloc(sizeof(float)*2*NX); /* Allocate flags to coordinate comm. */ if (RCCE_flag_alloc(&flag0)) return(1); if (RCCE_flag_alloc(&flag1)) return(1); /* initialize array a on all tiles; this stuffs a into private caches */ for (offset=0, i=0; i<NXNY; i++) a[i+offset] = 0.0; if (MY_ID == 0) for (offset=0, i=0; i<NX; i++) a[i+offset] = 1.0; if (MY_ID == NTILES1) for (offset=NXNY1,i=0; i<NX; i++) a[i+offset] = 2.0; /* put in a barrier so everybody can be sure to have initialized */ RCCE_barrier(&RCCE_COMM_WORLD); /* main loop */ if (MY_ID==0) time = RCCE_wtime(); while ((iter--)>0){ /* start with copying fringe data to neighboring tiles */ if (MY_ID!=NTILES1) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1); RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); } if (MY_ID != 0) { /* Initialize neighbor flag to zero */ RCCE_flag_write(&flag1, 0, MY_ID-1); /* copy private data to shared comm buffer of neighbor */ RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1); RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); } /* Make sure the data has been recvd and copy data out of buffer(s) */ if (MY_ID!=NTILES1) { RCCE_wait_until(flag1, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID); } if (MY_ID!=0) { RCCE_wait_until(flag0, RCCE_FLAG_SET); RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID); } /* apply the stencil operation */ for (i=0; i<NXNY2; i++) { a[i+O3] += W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5]; } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { time = RCCE_wtime()-time; } /* print result strip by strip; this would not be done on RC */ for (int id=0; id<=NTILES1; id++) { RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==id) { int start = NX; int end = NXNY1; if (MY_ID==0) start = 0; if (MY_ID == NTILES1) end = NXNY; for (offset=0, i=start; i<end; i++) { if (!(i%NX)) printf("\n"); // comment out next line and uncomment subsequent three to print error printf("%f ",a[i+offset]); // int jj=i/NX+(MY_ID*(NY-1)); // double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1)); // printf("%f ",a[i+offset]-aexact); } } } RCCE_barrier(&RCCE_COMM_WORLD); if (MY_ID==0) { printf("\nTotal time: %lf\n", time); } RCCE_finalize(); return(0); }