/** * @brief Check if any chstat process is listening. Bailout if not. */ static void ipc_require_listener(void) { /* While we are in the mutual exclusive section, we don't listen to signals, because it would be hard to reset the state of the mutex */ sigset_t blocked_signals, curr_set; (void) sigfillset(&blocked_signals); (void) sigprocmask(SIG_BLOCK, &blocked_signals, &curr_set); /* Mutual exclusive section so no other process removes shm segment meanwhile */ if(semdown(mtx) == -1) { bail_out(EXIT_FAILURE, "Error downing mutex"); } /* Do we have any listeners? */ if((shared->flag & READER_F) != 1) { (void) semup(mtx); // Because ipc_shutdown() tries to down the mutex bail_out(EXIT_SUCCESS, "No chstat process listening."); } /* Release Mutex */ if(semup(mtx) == -1) { bail_out(EXIT_FAILURE, "Error downing mutex"); } /* Reset to original procmask*/ (void) sigprocmask(SIG_SETMASK, &curr_set, NULL); }
void init_rand(double **primary, double **vectors){ debug("init\n"); srand((unsigned int)time((time_t*) NULL)); for (int i = 0; i < options.n; i++){ primary[i] = malloc(options.m * sizeof(double)); if (primary[i] == NULL){ bail_out(EXIT_FAILURE, "malloc primary[%d]", i); } for (int j = 0; j < options.m; j++){ primary[i][j] = rand_double(); } } int vec_len = 0; for (int i = 0; i < NUM_VEC; i++){ if (i%2 == 0){ vec_len = options.m; } else{ vec_len = options.n; } vectors[i] = malloc(vec_len * sizeof(double)); if (vectors[i] == NULL){ bail_out(EXIT_FAILURE, "malloc vectors[%d]\n", i); } for (int j = 0; j < vec_len; j++){ vectors[i][j] = rand_double(); } } debug("in init: %f", vectors[0][0]); }
/** * @brief Init list with all possible solutions * @return List with possible solutions */ struct sol_node *init_solver(void) { struct sol_node *sol, *it; int i, j, k, s; /* Allocate list head */ sol = (struct sol_node *) calloc(1, sizeof(struct sol_node)); if (sol == NULL) bail_out(EXIT_FAILURE, "calloc"); /* Allocate columns of the solution set */ for (i = 1, it = sol; i < CMAX; i++, it = it->next) { it->next = (struct sol_node *) calloc(1, sizeof(struct sol_node)); if (it->next == NULL) bail_out(EXIT_FAILURE, "calloc"); } /* Fill solution set with data */ s = CMAX / COLORS; for (j = 0; j < SLOTS; j++, s = s / COLORS) { for (it = sol, i = 0, k = -1; it != NULL; i++, it = it->next) { /* Flip k to next number/color */ if (i % s == 0) k = (k + 1) % COLORS; it->slots[j] = k; } } return sol; }
int main(int argc, char** argv) { int ret; int wait = 0; int opt; startup_info_t info; struct rt_task rt; FILE *file; while ((opt = getopt(argc, argv, OPTSTR)) != -1) { switch (opt) { case 'w': wait = 1; break; case ':': usage("Argument missing."); break; case '?': default: usage("Bad argument."); break; } } signal(SIGUSR1, SIG_IGN); if (argc - optind < 2) usage("Arguments missing."); if ((file = fopen(argv[optind + 0], "r")) == NULL) { fprintf(stderr, "Cannot open %s\n", argv[1]); return -1; } memset(&rt, 0, sizeof(struct rt_task)); if (parse_hime_ts_file(file, &rt) < 0) bail_out("Could not parse file\n"); if (sporadic_task_ns_semi(&rt) < 0) bail_out("could not setup rt task params"); fclose(file); info.exec_path = argv[optind + 1]; info.argv = argv + optind + 1; info.wait = wait; ret = create_rt_task_semi(launch, &info, &rt); if (ret < 0) bail_out("could not create rt child process"); return 0; }
/** * @brief Main child function * @details Exits after execution * @param id Number of the created child (starting with 1) * @param opts Parameters from command line, PRE: != null * @param ipipe Input pipe for a child process * PRE: != null, size = 2 * int * @param opipe Output pipe for a child process * PRE: != null, size = 2 * int */ void child_main(int id, struct options *opts, int *ipipe, int *opipe) { char buff[SECRET_MAX]; FILE *stream; /* Open input stream */ stream = fdopen(ipipe[0], "r"); if (stream == NULL) bail_out(EXIT_FAILURE, "fdopen"); /* Read from input pipe */ if (close(ipipe[1]) == -1) bail_out(EXIT_FAILURE, "close"); if (quit == 0 && fread(buff, 1, opts->slen, stream) == -1) { bail_out(EXIT_FAILURE, "read"); } if (close(ipipe[0]) == -1) bail_out(EXIT_FAILURE, "close"); /* Randomly alter string, verbose output */ if (quit == 0 && opts->verbose) { (void) printf("child%02d: erhalten: %s\n", id, buff); } alter_string(id, buff, opts->slen - 1); if (quit == 0 && opts->verbose) { (void) printf("child%02d: weiter : %s\n", id, buff); } /* Handle output */ if (id < opts->num_childs) { if (close(opipe[0]) == -1) bail_out(EXIT_FAILURE, "close"); /* Get file descriptor for writing */ stream = fdopen(opipe[1], "w"); if (stream == NULL) bail_out(EXIT_FAILURE, "fdopen"); if (quit == 0 && fwrite(buff, 1, opts->slen, stream) == -1) { bail_out(EXIT_FAILURE, "write"); } if (fflush(stream)) bail_out(EXIT_FAILURE, "fflush"); if (close(opipe[1]) == -1) bail_out(EXIT_FAILURE, "close"); } else { (void) printf("child%02d: Ende : %s\n", id, buff); } /* Return failure on exit */ if (quit == 1) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); }
/** * @brief Parse arguments * @details Check if arguments are correct and if a verbose * option has been specified * Global variables: pname * @param argc Number of arguments * @param argv Argument array * @param opts Option structure, PRE: opts != null * @param secret String to store secret in, PRE: != null * @return True, if the verbose flag has been found */ static void parse_args(int argc, char **argv, struct options *opts, char *secret) { int opt; char *endptr; size_t seclen; /* Store program name */ pname = ""; if (argc > 0) pname = argv[0]; /* Parse options */ opt = getopt(argc, argv, "v"); while (opt != -1) { switch (opt) { case 'v': opts->verbose = true; break; default: usage(); break; } opt = getopt(argc, argv, "v"); } /* Parse remaining arguments */ if (argc - optind != 2) usage(); /* Get number of child processes */ errno = 0; opts->num_childs = strtol(argv[optind], &endptr, 10); if (errno != 0 || argv[1] == endptr) { bail_out(EXIT_FAILURE, "Could not read number " " of child processes"); } /* Store secret string */ seclen = strlen(argv[optind + 1]); if (seclen >= SECRET_MAX) { bail_out(EXIT_FAILURE, "Secret string too long!" " Max: %d", SECRET_MAX); } (void) memset(secret, 0, SECRET_MAX); (void) memcpy(secret, argv[optind + 1], seclen); opts->slen = seclen + 1; }
void insertUnitStrInFilename(char *img_filename_root, configInfo *par, imageInfo *img, const int im, const int unit_index){ char *temp_filename, *temp_extensionless_filename, message[STR_LEN_0]; static char* unit_names[] = {"Kelvin", "Jansky-per-px", "SI", "LSun-per-px", "Tau", "#Rays"}; char *ext; /* Check if unit index falls outside range of possible unit names */ if(unit_index < 0 || unit_index > sizeof(unit_names)/sizeof(*unit_names) - 1){ sprintf(message, "Image unit index '%d' does not have a corresponding unit name", unit_index); if(!silent) bail_out(message); exit(0); } copyInparStr(img_filename_root, &(temp_filename)); /* Extract filename extension */ ext = strrchr(img_filename_root, '.'); if (!ext) { /* Set to blank string if no filename extension was extracted */ ext = ""; } else { /* Remove extension from temporary filename */ temp_extensionless_filename = removeFilenameExtension(temp_filename, '.', '/'); strcpy(temp_filename, temp_extensionless_filename); free(temp_extensionless_filename); } /* Append unit name to temporary filename */ strcat(temp_filename, "_"); strcat(temp_filename, unit_names[img[im].imgunits[unit_index]]); strcat(temp_filename, ext); /* Update image filename from temporary filename */ copyInparStr(temp_filename, &(img[im].filename)); free(temp_filename); }
char *removeFilenameExtension(char* inStr, char extensionChar, char pathSeparator) { char *outStr, *lastDotInFilename, *lastPathSeparatorInFilename; if (inStr == NULL) return NULL; outStr = malloc(strlen(inStr) + 1); if(!outStr){ if(!silent) bail_out("Error allocating memory for filename extension removal"); exit(0); } strcpy(outStr, inStr); /* Find last occurrences of extension character and path separator character */ lastDotInFilename = strrchr(outStr, extensionChar); lastPathSeparatorInFilename = (pathSeparator == 0) ? NULL : strrchr(outStr, pathSeparator); /* Truncate filename at occurrence of last extension character assuming it comes after the last path separator character */ if (lastDotInFilename != NULL) { if (lastPathSeparatorInFilename != NULL) { if (lastPathSeparatorInFilename < lastDotInFilename) { *lastDotInFilename = '\0'; } } else { *lastDotInFilename = '\0'; } } return outStr; }
static void create_game(struct ClientList *el_cur) { if (el_cur->game_count >= dict_size) { /* No new game possible. */ el_cur->status_id = EndGame; } else { /* Assign secret word. */ DEBUG("secret word before: %s\n", strings[el_cur->game_count]); strncpy(el_cur->secret_word, strings[el_cur->game_count], MAX_DATA); el_cur->errors = 0; el_cur->status_id = Running; el_cur->game_count = el_cur->game_count + 1; if (memset(el_cur->client_word, '_', MAX_DATA) != el_cur->client_word) { bail_out(EXIT_FAILURE, "memset(3) failed\n"); } int i = 0; while(el_cur->secret_word[i] != '\0') { if (el_cur->secret_word[i] == ' ') { el_cur->client_word[i] = ' '; } i++; } el_cur->client_word[i] = '\0'; DEBUG("Spiel erstellt: %s Secret Word: %s\n", shared->s_word, el_cur->secret_word); } }
void SkypeSend(char *szMsg) { COPYDATASTRUCT CopyData; int count=0; if (!hSkypeWnd) { LOG(("SkypeSend: DAMN! No Skype window handle! :(")); return; } if (strcmp(szMsg, "PING")) {LOG(("> %s", szMsg));} CopyData.dwData=0; CopyData.lpData=szMsg; CopyData.cbData=strlen(szMsg)+1; while (!SendMessageTimeout(hSkypeWnd, WM_COPYDATA, (WPARAM)hWnd, (LPARAM)&CopyData, SMTO_ABORTIFHUNG, 3000, NULL)) { count++; LOG(("SkypeSend: failed, try #%d", count)); if (count==5) { OUTPUT("Sending message to Skype failed too often."); OUTPUT("Skype may have died unexpectedly, I will try to restart it."); ConnectToSkypeAPI((void *)TRUE); OUTPUT("Restart complete. Trying to deliver message one more time."); if (!SendMessageTimeout(hSkypeWnd, WM_COPYDATA, (WPARAM)hWnd, (LPARAM)&CopyData, SMTO_ABORTIFHUNG, 3000, NULL)) { OUTPUT("It still failed. Skype seems to be completely f*cked up. I've given up. Bye.."); bail_out(1); break; } else { OUTPUT("Now it worked! :)"); break; } } Sleep(1000); } }
void iterate(double **primary, double **vectors) { double start, finish; double **secondary = malloc(options.n * sizeof(double*)); if (secondary == NULL){ bail_out(EXIT_FAILURE, "malloc secondary"); } for (int i = 0; i < options.n; i++){ secondary[i] = calloc(options.m, sizeof(double)); if (secondary[i] == NULL){ bail_out(EXIT_FAILURE, "malloc secondary[%d]", i); } } start = omp_get_wtime(); for (int i = 0; i < options.iter; i++){ for(int j = 0; j < options.n; j++){ for(int k = 0; k < options.m; k++){ update(primary, secondary, j, k, vectors); } finish = omp_get_wtime(); double usec_diff = finish - start; } double **temp = primary; primary = secondary; secondary = temp; } finish = omp_get_wtime(); double usec_diff = finish - start; fprintf(stderr,"loop time = %f\n", usec_diff); if (options.iter % 2 == 1){ double **temp = primary; primary = secondary; secondary = temp; for (int i = 0; i < options.n; i++){ memcpy(primary[i],secondary[i],options.m * sizeof(double)); } } for (int i = 0; i < options.n; i++){ free(secondary[i]); } free(secondary); }
void plan(unsigned tests) throw(fatal_exception) { if (is_planned) { bail_out("Can't plan again!"); } is_planned = true; output_plan(tests); expected = tests; }
void writeFits(const int i, const int unit_index, configInfo *par, imageInfo *img){ int unitI = img[i].imgunits[unit_index]; if(unitI>5){ if(!silent) bail_out("Image unit number invalid"); exit(1); } write4Dfits(i, unit_index, par, img); }
int main(int argc, char* argv[]) { pid_t child; int ret; char cmd; int child_exit; if (argc < 2) { fprintf(stderr, "Must specify command to run in background\n"); exit(-1); } TRY(child=fork()); if (child == 0) { /* child */ pid_t gchild; TRY(setpgid(getpid(), getpid())); /* create process group */ TRY(gchild=fork()); if (gchild == 0) { /* grandchild */ TRY(execvp(argv[1],&argv[1])); } exit(0); } /* parent */ signal(SIGALRM, alarm_handler); alarm(10*60); /* suicide in case nothing happens */ TRY(wait(&child_exit)); if (!WIFEXITED(child_exit) || WEXITSTATUS(child_exit)!=0) { fprintf(stderr, "child did not exit normally (status=%d)\n", child_exit); exit(-1); } for (;;) { TRY(ret=read(STDIN_FILENO, &cmd, 1)); if (ret == 0) break; /* eof -> exit */ switch (cmd) { case 'K': ret = kill(-child, SIGINT); /* child process _group_ */ if (ret < 0 && errno != ESRCH) { bail_out("kill failed"); } write(STDOUT_FILENO, &cmd, 1); /* echo ack */ break; case '\n': break;/* ignore (for interactive testing) */ default: fprintf(stderr, "Unknown command '%c'\n", cmd); exit(-1); } } return 0; }
/** * @brief Initialize signal handling * @details Assign the signal handler routine, * Global variables: quit */ static void sig_init() { int signals[] = {SIGTERM, SIGINT}; struct sigaction sa; int i; /* Initialize sigaction */ quit = 0; sa.sa_handler = handler; sa.sa_flags = 0; if (sigfillset(&sa.sa_mask) == -1) bail_out(EXIT_FAILURE, "sigemptyset"); /* Assign sigactions */ for (i = 0; i < sizeof(signals) / sizeof(int); i++) { if (sigaction(signals[i], &sa, NULL) == -1) bail_out(EXIT_FAILURE, "sigaction"); } }
LONG APIENTRY WndProc(HWND hWnd, UINT message, UINT wParam, LONG lParam) { PCOPYDATASTRUCT CopyData; char *szSkypeMsg=NULL; switch (message) { case WM_COPYDATA: // LOG("WM_COPYDATA", "start"); if(hSkypeWnd==(HWND)wParam) { CopyData=(PCOPYDATASTRUCT)lParam; szSkypeMsg=strdup(CopyData->lpData); ReplyMessage(1); if (!strcmp(szSkypeMsg, "PONG")) { WatchDog=1; break; } // Hide PING-PONG LOG(("< %s", szSkypeMsg)); if (!strcmp(szSkypeMsg, "USERSTATUS LOGGEDOUT")) { OUTPUT("Skype shut down gracefully. I'll leave too, bye.. :)"); bail_out(1); } #ifdef USE_AUTHENTICATION if (password && !Authenticated) break; #endif if (AcceptSocket!=INVALID_SOCKET) { unsigned int length=strlen(szSkypeMsg); if (send(AcceptSocket, (char *)&length, sizeof(length), 0)==SOCKET_ERROR || send(AcceptSocket, szSkypeMsg, length, 0)==SOCKET_ERROR) OUTPUT("Cannot send to client :("); } } break; case WM_DESTROY: PostQuitMessage(0); break; default: if(message==ControlAPIAttach) { // Skype responds with Attach to the discover-message AttachStatus=lParam; if (AttachStatus==SKYPECONTROLAPI_ATTACH_SUCCESS) hSkypeWnd=(HWND)wParam; // Skype gave us the communication window handle if (AttachStatus!=SKYPECONTROLAPI_ATTACH_API_AVAILABLE) SetEvent(SkypeReady); break; } return (DefWindowProc(hWnd, message, wParam, lParam)); } // LOG("WM_COPYDATA", "exit"); if (szSkypeMsg) free(szSkypeMsg); return 1; }
int main(int argc, char** argv) { FILE *fi, *fo; int c, i; self = argv[0]; if (argc != 4) { usage(); return 0; } if ((fi = fopen(argv[1], "rb")) == 0) bail_out("Cannot open input file ", argv[1]); if ((fo = fopen(argv[2], "w")) == 0) bail_out("Cannot open output file ", argv[2]); if ((c = fgetc(fi)) != EOF) { fprintf(fo, GPL); fprintf(fo, "#ifndef %s_H\n", argv[3]); fprintf(fo, "#define %s_H\n\n", argv[3]); fprintf(fo, "const unsigned char %s[] = {\n", argv[3]); fprintf(fo, c < 16 ? " 0x%02x" : " 0x%02x", (unsigned char) c); } i = 1; while ((c = fgetc(fi)) != EOF) { if (i < 12) fprintf(fo, c < 16 ? ", 0x%02x" : ", 0x%02x", (unsigned char) c); else { fprintf(fo, c < 16 ? ",\n 0x%02x" : ",\n 0x%02x", (unsigned char) c); i = 0; } i++; } fprintf(fo, "\n};\n\n"); fprintf(fo, "#endif\n"); printf("converted %s\n", argv[1]); return 0; }
/*....................................................................*/ void checkFwrite(const size_t fwriteResult, const size_t expectedNum, char *message){ char string[STR_LEN_0]; if(fwriteResult!=expectedNum){ if(!silent){ snprintf(string, STR_LEN_0, "fwrite() failed to write %s. Expected %d got %d", message, (int)expectedNum, (int)fwriteResult); bail_out(string); } exit(1); } }
/*....................................................................*/ void checkFscanf(const int fscanfResult, const int expectedNum, char *message){ char string[STR_LEN_0]; if(fscanfResult!=expectedNum){ if(!silent){ snprintf(string, STR_LEN_0, "fscanf() failed to read %s - read %d bytes when %d expected.", message, fscanfResult, expectedNum); bail_out(string); } exit(1); } }
/*....................................................................*/ void checkFgets(char *fgetsResult, char *message){ char string[STR_LEN_0]; if(fgetsResult==NULL){ if(!silent){ snprintf(string, STR_LEN_0, "fgets() failed to read %s", message); bail_out(string); } exit(1); } }
local_shared_block_ptrs shared_2d_array_alloc(int sizex, int sizey, int offsetx, int offsety){ long int alloc_size = sizex * sizey * sizeof(DTYPE); local_shared_block ptr; ptr = upc_alloc(alloc_size); if(ptr == NULL) bail_out("Failing shared allocation of %d bytes", alloc_size); long int line_ptrs_size = sizeof(local_shared_block) * sizey; local_shared_block_ptrs line_ptrs = upc_alloc(line_ptrs_size); if(line_ptrs == NULL) bail_out("Failing shared allocation of %d bytes", line_ptrs_size); for(int y=0; y<sizey; y++){ line_ptrs[y] = ptr + (y * sizex) - offsetx; } line_ptrs -= offsety; return line_ptrs; }
static char *get_strings() { char *string = NULL; char ch; size_t len = 0; while (string == NULL && ch != EOF) { while (EOF != (ch = fgetc(in_stream)) && ch != '\n') { if (ch != ' ' && isalpha((int)ch) == 0) { // fprintf(stderr, "Only [a-z] is a valid input. | \t" // "| Input another or end with CTRL+D: "); continue; } string = (char*) realloc(string, len+2); if (string == NULL) { bail_out(EXIT_FAILURE, "realloc(3) failed"); } string[len++] = toupper(ch); if (len >= MAX_DATA) { bail_out(EXIT_FAILURE, "Input too long\n"); } } if (ferror(in_stream)) { bail_out(EXIT_FAILURE, "Error while reading from stream"); } } if(string) { string[len] = '\0'; } else { printf("\nFinished dictionary...\n"); return string; } DEBUG("Added string: %s | Input another [a-z] or end with CTRL+D: ", string); return string; }
static void read_dict() { int index; for (index = 0; (string = get_strings()); ++index) { if (string[0] == '\0') continue; strings = (char**) realloc(strings, (index+1)*sizeof(*strings)); if (strings == NULL) { bail_out(EXIT_FAILURE, "realloc(3) failed"); } strings[index] = string; } /* Take a note of how many entries we have yet. */ dict_size = index; }
private_shared_block_ptrs partially_privatize(local_shared_block_ptrs array, int thread){ int sizey = thread_sizey[thread]; int offsety = thread_offsety[thread]; long int alloc_size = sizey * sizeof(local_shared_block); private_shared_block_ptrs ptr = prk_malloc(alloc_size); if(ptr == NULL) bail_out("Unable to allocate array2"); ptr -= offsety; for(int y=offsety; y<offsety + sizey; y++) ptr[y] = (&array[y][0]); return ptr; }
/** * Cleanup parent process * @brief Close fds and files and wait for the end of the child process whcih is triggered by closing the pipe. * @details global variables: {writing,reading}_pipe, pipes_saved */ static void cleanup_parent() { (void) fclose(reading_pipe); (void) fclose(writing_pipe); (void) close(*(pipes_saved + 1)); (void) close(*(pipes_saved + 2)); DEBUG ("WAIT PARENT\n"); int wait_result= wait(NULL); if (wait_result == -1) { bail_out("Wait: No child process found"); } DEBUG ("EXIT PARENT\n"); }
int pointEvaluation(inputPars *par,double ran, double x, double y, double z){ double weight1, weight2, val[99],normalizer=0.0,totalDensity=0.0; int i; density(par->minScale,par->minScale,par->minScale,val); for (i=0;i<par->collPart;i++) normalizer += val[i]; if (normalizer<=0.){ if(!silent) bail_out("Error: Sum of reference densities equals 0"); exit(1); } //abundance(par->minScale,par->minScale,par->minScale,val2); density(x,y,z,val); for (i=0;i<par->collPart;i++) totalDensity += val[i]; //abundance(x,y,z,val2); weight1=pow(totalDensity/normalizer,0.2); weight2=0.; if(ran < weight1 || ran < weight2) return 1; else return 0; }
/* Initializes the grid of charges */ double *initializeGrid(bbox_t tile) { double *grid; uint64_t x, y, n_columns, n_rows; int error=0, my_ID; n_columns = tile.right-tile.left+1; n_rows = tile.top-tile.bottom+1; grid = (double*) prk_malloc(n_columns*n_rows*sizeof(double)); if (grid == NULL) { MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); printf("ERROR: Process %d could not allocate space for grid\n", my_ID); error = 1; } bail_out(error); /* So far supporting only initialization with dipoles */ for (y=tile.bottom; y<=tile.top; y++) { for (x=tile.left; x<=tile.right; x++) { grid[y-tile.bottom+(x-tile.left)*n_rows] = (x%2 == 0) ? Q : -Q; } } return grid; }
int main(int argc, char ** argv) { long order; /* order of a the matrix */ int Tile_order=32; /* default tile size for tiling of local transpose */ int iterations; /* number of times to do the transpose */ int tiling; /* boolean: true if tiling is used */ int i, j, it, jt, iter; /* dummies */ double bytes; /* combined size of matrices */ double * RESTRICT A; /* buffer to hold original matrix */ double * RESTRICT B; /* buffer to hold transposed matrix */ double abserr; /* absolute error */ double epsilon=1.e-8; /* error tolerance */ double transpose_time,/* timing parameters */ avgtime; int nthread_input, nthread; int num_error=0; /* flag that signals that requested and obtained numbers of threads are the same */ /********************************************************************* ** read and test input parameters *********************************************************************/ printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("OpenMP Matrix transpose: B = A^T\n"); if (argc != 4 && argc != 5){ printf("Usage: %s <# threads> <# iterations> <matrix order> [tile size]\n", *argv); exit(EXIT_FAILURE); } /* Take number of threads to request from command line */ nthread_input = atoi(*++argv); if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) { printf("ERROR: Invalid number of threads: %d\n", nthread_input); exit(EXIT_FAILURE); } omp_set_num_threads(nthread_input); iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); exit(EXIT_FAILURE); } order = atoi(*++argv); if (order < 0){ printf("ERROR: Matrix Order must be greater than 0 : %d \n", order); exit(EXIT_FAILURE); } if (argc == 5) Tile_order = atoi(*++argv); /* a non-positive tile size means no tiling of the local transpose */ tiling = (Tile_order > 0) && (Tile_order < order); if (!tiling) Tile_order = order; /********************************************************************* ** Allocate space for the input and transpose matrix *********************************************************************/ A = (double *)malloc(order*order*sizeof(double)); if (A == NULL){ printf(" ERROR: cannot allocate space for input matrix: %ld\n", order*order*sizeof(double)); exit(EXIT_FAILURE); } B = (double *)malloc(order*order*sizeof(double)); if (B == NULL){ printf(" ERROR: cannot allocate space for output matrix: %ld\n", order*order*sizeof(double)); exit(EXIT_FAILURE); } bytes = 2.0 * sizeof(double) * order * order; #pragma omp parallel private (iter) { #pragma omp master { nthread = omp_get_num_threads(); if (nthread != nthread_input) { num_error = 1; printf("ERROR: number of requested threads %d does not equal ", nthread_input); printf("number of spawned threads %d\n", nthread); } else { printf("Number of threads = %i;\n",nthread_input); printf("Matrix order = %ld\n", order); printf("Number of iterations = %d\n", iterations); if (tiling) { printf("Tile size = %d\n", Tile_order); #ifdef COLLAPSE printf("Using loop collapse\n"); #endif } else printf("Untiled\n"); } } bail_out(num_error); /* Fill the original matrix, set transpose to known garbage value. */ if (tiling) { #ifdef COLLAPSE #pragma omp for private (i,it,jt) collapse(2) #else #pragma omp for private (i,it,jt) #endif for (j=0; j<order; j+=Tile_order) for (i=0; i<order; i+=Tile_order) for (jt=j; jt<MIN(order,j+Tile_order);jt++) for (it=i; it<MIN(order,i+Tile_order); it++){ A(it,jt) = (double) (order*jt + it); B(it,jt) = 0.0; } } else { #pragma omp for private (i) for (j=0;j<order;j++) for (i=0;i<order; i++) { A(i,j) = (double) (order*j + i); B(i,j) = 0.0; } } for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { #pragma omp barrier #pragma omp master { transpose_time = wtime(); } } /* Transpose the matrix */ if (!tiling) { #pragma omp for private (j) for (i=0;i<order; i++) for (j=0;j<order;j++) { B(j,i) += A(i,j); A(i,j) += 1.0; } } else { #ifdef COLLAPSE #pragma omp for private (j,it,jt) collapse(2) #else #pragma omp for private (j,it,jt) #endif for (i=0; i<order; i+=Tile_order) for (j=0; j<order; j+=Tile_order) for (it=i; it<MIN(order,i+Tile_order); it++) for (jt=j; jt<MIN(order,j+Tile_order);jt++) { B(jt,it) += A(it,jt); A(it,jt) += 1.0; } } } /* end of iter loop */ #pragma omp barrier #pragma omp master { transpose_time = wtime() - transpose_time; } } /* end of OpenMP parallel region */ abserr = test_results (order, B, iterations); /********************************************************************* ** Analyze and output results. *********************************************************************/ if (abserr < epsilon) { printf("Solution validates\n"); avgtime = transpose_time/iterations; printf("Rate (MB/s): %lf Avg time (s): %lf\n", 1.0E-06 * bytes/avgtime, avgtime); #ifdef VERBOSE printf("Squared errors: %f \n", abserr); #endif exit(EXIT_SUCCESS); } else { printf("ERROR: Aggregate squared error %lf exceeds threshold %e\n", abserr, epsilon); exit(EXIT_FAILURE); } } /* end of main */
int main(int argc, char **argv) { long int j, iter; /* dummies */ double scalar; /* constant used in Triad operation */ int iterations; /* number of times vector loop gets repeated */ long int length, /* vector length per processor */ total_length, /* total vector length */ offset; /* offset between vectors a and b, and b and c */ double bytes; /* memory IO size */ size_t space; /* memory used for a single vector */ double nstream_time, /* timing parameters */ avgtime = 0.0, maxtime = 0.0, mintime = 366.0*8760.0*3600.0; /* set the minimum time to a large value; one leap year should be enough */ int Num_procs, /* process parameters */ my_ID, /* rank of calling process */ root=0; /* ID of master process */ int error=0; /* error flag for individual process */ /********************************************************************************** * process and test input parameters ***********************************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&Num_procs); MPI_Comm_rank(MPI_COMM_WORLD,&my_ID); if (my_ID == root) { printf("MPI stream triad: A = B + scalar*C\n"); if (argc != 4) { printf("Usage: %s <# iterations> <vector length> <offset>\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1) { printf("ERROR: Invalid number of iterations: %d\n", iterations); error = 1; goto ENDOFTESTS; } total_length = atol(*++argv); if (total_length < Num_procs) { printf("ERROR: Invalid vector length: %ld\n", total_length); error = 1; goto ENDOFTESTS; } else length = total_length/Num_procs; offset = atol(*++argv); if (offset < 0) { printf("ERROR: Invalid array offset: %ld\n", offset); error = 1; goto ENDOFTESTS; } #ifdef STATIC_ALLOCATION if ((3*length + 2*offset) > N) { printf("ERROR: vector length/offset %ld/%ld too ", total_length, offset); printf("large; increase MAXLENGTH in Makefile or decrease vector length\n"); error = 1; goto ENDOFTESTS; } #endif ENDOFTESTS: ; } bail_out(error); /* broadcast initialization data */ MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD); #ifndef STATIC_ALLOCATION space = (3*length + 2*offset)*sizeof(double); a = (double *) malloc(space); if (!a && my_ID == root) { printf("ERROR: Could not allocate %ld bytes for vectors\n", (long int)space); error = 1; } bail_out(error); #endif b = a + length + offset; c = b + length + offset; bytes = 3.0 * sizeof(double) * length * Num_procs; if (my_ID == root) { printf("Number of processes = %d\n", Num_procs); printf("Vector length = %ld\n", total_length); printf("Offset = %ld\n", offset); printf("Number of iterations = %d\n", iterations); } #pragma vector always for (j=0; j<length; j++) { a[j] = 0.0; b[j] = 2.0; c[j] = 2.0; } /* --- MAIN LOOP --- repeat Triad iterations times --- */ scalar = SCALAR; for (iter=0; iter<iterations; iter++) { MPI_Barrier(MPI_COMM_WORLD); if (my_ID == root) { nstream_time = wtime(); } #pragma vector always for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j]; if (my_ID == root) { if (iter>0 || iterations==1) { /* skip the first iteration */ nstream_time = wtime() - nstream_time; avgtime = avgtime + nstream_time; mintime = MIN(mintime, nstream_time); maxtime = MAX(maxtime, nstream_time); } } /* insert a dependency between iterations to avoid dead-code elimination */ #pragma vector always for (j=0; j<length; j++) b[j] = a[j]; } /********************************************************************* ** Analyze and output results. *********************************************************************/ if (my_ID == root) { if (checkTRIADresults(iterations, length)) { avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * bytes/mintime, avgtime, mintime); printf(", Max time (s): %lf\n", maxtime); } else error = 1; } bail_out(error); MPI_Finalize(); }
int main(int argc, char ** argv) { int Num_procs; /* number of ranks */ int Num_procsx, Num_procsy; /* number of ranks in each coord direction */ int my_ID; /* SHMEM rank */ int my_IDx, my_IDy; /* coordinates of rank in rank grid */ int right_nbr; /* global rank of right neighboring tile */ int left_nbr; /* global rank of left neighboring tile */ int top_nbr; /* global rank of top neighboring tile */ int bottom_nbr; /* global rank of bottom neighboring tile */ DTYPE *top_buf_out; /* communication buffer */ DTYPE *top_buf_in[2]; /* " " */ DTYPE *bottom_buf_out; /* " " */ DTYPE *bottom_buf_in[2];/* " " */ DTYPE *right_buf_out; /* " " */ DTYPE *right_buf_in[2]; /* " " */ DTYPE *left_buf_out; /* " " */ DTYPE *left_buf_in[2]; /* " " */ int root = 0; int n, width, height;/* linear global and local grid dimension */ int i, j, ii, jj, kk, it, jt, iter, leftover; /* dummies */ int istart, iend; /* bounds of grid tile assigned to calling rank */ int jstart, jend; /* bounds of grid tile assigned to calling rank */ DTYPE reference_norm; DTYPE f_active_points; /* interior of grid with respect to stencil */ int stencil_size; /* number of points in the stencil */ DTYPE flops; /* floating point ops per iteration */ int iterations; /* number of times to run the algorithm */ double avgtime, /* timing parameters */ *local_stencil_time, *stencil_time; DTYPE * RESTRICT in; /* input grid values */ DTYPE * RESTRICT out; /* output grid values */ long total_length_in; /* total required length to store input array */ long total_length_out;/* total required length to store output array */ int error=0; /* error flag */ DTYPE weight[2*RADIUS+1][2*RADIUS+1]; /* weights of points in the stencil */ int *arguments; /* command line parameters */ int count_case=4; /* number of neighbors of a rank */ long *pSync_bcast; /* work space for collectives */ long *pSync_reduce; /* work space for collectives */ double *pWrk_time; /* work space for collectives */ DTYPE *pWrk_norm; /* work space for collectives */ int *iterflag; /* synchronization flags */ int sw; /* double buffering switch */ DTYPE *local_norm, *norm; /* local and global error norms */ /******************************************************************************* ** Initialize the SHMEM environment ********************************************************************************/ prk_shmem_init(); my_ID=prk_shmem_my_pe(); Num_procs=prk_shmem_n_pes(); pSync_bcast = (long *) prk_shmem_malloc(PRK_SHMEM_BCAST_SYNC_SIZE*sizeof(long)); pSync_reduce = (long *) prk_shmem_malloc(PRK_SHMEM_REDUCE_SYNC_SIZE*sizeof(long)); pWrk_time = (double *) prk_shmem_malloc(PRK_SHMEM_REDUCE_MIN_WRKDATA_SIZE*sizeof(double)); pWrk_norm = (DTYPE *) prk_shmem_malloc(PRK_SHMEM_REDUCE_MIN_WRKDATA_SIZE*sizeof(DTYPE)); local_stencil_time = (double *) prk_shmem_malloc(sizeof(double)); stencil_time = (double *) prk_shmem_malloc(sizeof(double)); local_norm = (DTYPE *) prk_shmem_malloc(sizeof(DTYPE)); norm = (DTYPE *) prk_shmem_malloc(sizeof(DTYPE)); iterflag = (int *) prk_shmem_malloc(2*sizeof(int)); if (!(pSync_bcast && pSync_reduce && pWrk_time && pWrk_norm && iterflag && local_stencil_time && stencil_time && local_norm && norm)) { printf("Could not allocate scalar variables on rank %d\n", my_ID); error = 1; } bail_out(error); for(i=0;i<PRK_SHMEM_BCAST_SYNC_SIZE;i++) pSync_bcast[i]=PRK_SHMEM_SYNC_VALUE; for(i=0;i<PRK_SHMEM_REDUCE_SYNC_SIZE;i++) pSync_reduce[i]=PRK_SHMEM_SYNC_VALUE; arguments=(int*)prk_shmem_malloc(2*sizeof(int)); /******************************************************************************* ** process, test, and broadcast input parameters ********************************************************************************/ if (my_ID == root) { #ifndef STAR printf("ERROR: Compact stencil not supported\n"); error = 1; goto ENDOFTESTS; #endif if (argc != 3){ printf("Usage: %s <# iterations> <array dimension> \n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); arguments[0]=iterations; if (iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); error = 1; goto ENDOFTESTS; } n = atoi(*++argv); arguments[1]=n; long nsquare = (long)n * (long)n; if (nsquare < Num_procs){ printf("ERROR: grid size must be at least # ranks: %ld\n", nsquare); error = 1; goto ENDOFTESTS; } if (RADIUS < 0) { printf("ERROR: Stencil radius %d should be non-negative\n", RADIUS); error = 1; goto ENDOFTESTS; } if (2*RADIUS +1 > n) { printf("ERROR: Stencil radius %d exceeds grid size %d\n", RADIUS, n); error = 1; goto ENDOFTESTS; } ENDOFTESTS:; } bail_out(error); /* determine best way to create a 2D grid of ranks (closest to square, for best surface/volume ratio); we do this brute force for now */ for (Num_procsx=(int) (sqrt(Num_procs+1)); Num_procsx>0; Num_procsx--) { if (!(Num_procs%Num_procsx)) { Num_procsy = Num_procs/Num_procsx; break; } } my_IDx = my_ID%Num_procsx; my_IDy = my_ID/Num_procsx; /* compute neighbors; don't worry about dropping off the edges of the grid */ right_nbr = my_ID+1; left_nbr = my_ID-1; top_nbr = my_ID+Num_procsx; bottom_nbr = my_ID-Num_procsx; iterflag[0] = iterflag[1] = 0; if(my_IDx==0) count_case--; if(my_IDx==Num_procsx-1) count_case--; if(my_IDy==0) count_case--; if(my_IDy==Num_procsy-1) count_case--; if (my_ID == root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("SHMEM stencil execution on 2D grid\n"); printf("Number of ranks = %d\n", Num_procs); printf("Grid size = %d\n", n); printf("Radius of stencil = %d\n", RADIUS); printf("Tiles in x/y-direction = %d/%d\n", Num_procsx, Num_procsy); printf("Type of stencil = star\n"); #ifdef DOUBLE printf("Data type = double precision\n"); #else printf("Data type = single precision\n"); #endif #if LOOPGEN printf("Script used to expand stencil loop body\n"); #else printf("Compact representation of stencil loop body\n"); #endif #if SPLITFENCE printf("Split fence = ON\n"); #else printf("Split fence = OFF\n"); #endif printf("Number of iterations = %d\n", iterations); } shmem_barrier_all(); shmem_broadcast32(&arguments[0], &arguments[0], 2, root, 0, 0, Num_procs, pSync_bcast); iterations=arguments[0]; n=arguments[1]; shmem_barrier_all(); prk_shmem_free(arguments); /* compute amount of space required for input and solution arrays */ width = n/Num_procsx; leftover = n%Num_procsx; if (my_IDx<leftover) { istart = (width+1) * my_IDx; iend = istart + width + 1; } else { istart = (width+1) * leftover + width * (my_IDx-leftover); iend = istart + width; } width = iend - istart + 1; if (width == 0) { printf("ERROR: rank %d has no work to do\n", my_ID); error = 1; } bail_out(error); height = n/Num_procsy; leftover = n%Num_procsy; if (my_IDy<leftover) { jstart = (height+1) * my_IDy; jend = jstart + height + 1; } else { jstart = (height+1) * leftover + height * (my_IDy-leftover); jend = jstart + height; } height = jend - jstart + 1; if (height == 0) { printf("ERROR: rank %d has no work to do\n", my_ID); error = 1; } bail_out(error); if (width < RADIUS || height < RADIUS) { printf("ERROR: rank %d has work tile smaller then stencil radius\n", my_ID); error = 1; } bail_out(error); total_length_in = (width+2*RADIUS); total_length_in *= (height+2*RADIUS); total_length_in *= sizeof(DTYPE); total_length_out = width; total_length_out *= height; total_length_out *= sizeof(DTYPE); in = (DTYPE *) malloc(total_length_in); out = (DTYPE *) malloc(total_length_out); if (!in || !out) { printf("ERROR: rank %d could not allocate space for input/output array\n", my_ID); error = 1; } bail_out(error); /* fill the stencil weights to reflect a discrete divergence operator */ for (jj=-RADIUS; jj<=RADIUS; jj++) for (ii=-RADIUS; ii<=RADIUS; ii++) WEIGHT(ii,jj) = (DTYPE) 0.0; stencil_size = 4*RADIUS+1; for (ii=1; ii<=RADIUS; ii++) { WEIGHT(0, ii) = WEIGHT( ii,0) = (DTYPE) (1.0/(2.0*ii*RADIUS)); WEIGHT(0,-ii) = WEIGHT(-ii,0) = -(DTYPE) (1.0/(2.0*ii*RADIUS)); } norm[0] = (DTYPE) 0.0; f_active_points = (DTYPE) (n-2*RADIUS)*(DTYPE) (n-2*RADIUS); /* intialize the input and output arrays */ for (j=jstart; j<jend; j++) for (i=istart; i<iend; i++) { IN(i,j) = COEFX*i+COEFY*j; OUT(i,j) = (DTYPE)0.0; } /* allocate communication buffers for halo values */ top_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*width); if (!top_buf_out) { printf("ERROR: Rank %d could not allocate output comm buffers for y-direction\n", my_ID); error = 1; } bail_out(error); bottom_buf_out = top_buf_out+RADIUS*width; top_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*width); if(!top_buf_in) { printf("ERROR: Rank %d could not allocate input comm buffers for y-direction\n", my_ID); error=1; } bail_out(error); top_buf_in[1] = top_buf_in[0] + RADIUS*width; bottom_buf_in[0] = top_buf_in[1] + RADIUS*width; bottom_buf_in[1] = bottom_buf_in[0] + RADIUS*width; right_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*height); if (!right_buf_out) { printf("ERROR: Rank %d could not allocate output comm buffers for x-direction\n", my_ID); error = 1; } bail_out(error); left_buf_out=right_buf_out+RADIUS*height; right_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*height); if(!right_buf_in) { printf("ERROR: Rank %d could not allocate input comm buffers for x-dimension\n", my_ID); error=1; } bail_out(error); right_buf_in[1] = right_buf_in[0] + RADIUS*height; left_buf_in[0] = right_buf_in[1] + RADIUS*height; left_buf_in[1] = left_buf_in[0] + RADIUS*height; /* make sure all symmetric heaps are allocated before being used */ shmem_barrier_all(); for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { shmem_barrier_all(); local_stencil_time[0] = wtime(); } /* sw determines which incoming buffer to select */ sw = iter%2; /* need to fetch ghost point data from neighbors */ if (my_IDy < Num_procsy-1) { for (kk=0,j=jend-RADIUS; j<=jend-1; j++) for (i=istart; i<=iend; i++) { top_buf_out[kk++]= IN(i,j); } shmem_putmem(bottom_buf_in[sw], top_buf_out, RADIUS*width*sizeof(DTYPE), top_nbr); #if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], top_nbr); #endif } if (my_IDy > 0) { for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) { bottom_buf_out[kk++]= IN(i,j); } shmem_putmem(top_buf_in[sw], bottom_buf_out, RADIUS*width*sizeof(DTYPE), bottom_nbr); #if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], bottom_nbr); #endif } if(my_IDx < Num_procsx-1) { for(kk=0,j=jstart;j<=jend;j++) for(i=iend-RADIUS;i<=iend-1;i++) { right_buf_out[kk++]=IN(i,j); } shmem_putmem(left_buf_in[sw], right_buf_out, RADIUS*height*sizeof(DTYPE), right_nbr); #if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], right_nbr); #endif } if(my_IDx>0) { for(kk=0,j=jstart;j<=jend;j++) for(i=istart;i<=istart+RADIUS-1;i++) { left_buf_out[kk++]=IN(i,j); } shmem_putmem(right_buf_in[sw], left_buf_out, RADIUS*height*sizeof(DTYPE), left_nbr); #if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], left_nbr); #endif } #if SPLITFENCE == 0 shmem_fence(); if(my_IDy<Num_procsy-1) shmem_int_inc(&iterflag[sw], top_nbr); if(my_IDy>0) shmem_int_inc(&iterflag[sw], bottom_nbr); if(my_IDx<Num_procsx-1) shmem_int_inc(&iterflag[sw], right_nbr); if(my_IDx>0) shmem_int_inc(&iterflag[sw], left_nbr); #endif shmem_int_wait_until(&iterflag[sw], SHMEM_CMP_EQ, count_case*(iter/2+1)); if (my_IDy < Num_procsy-1) { for (kk=0,j=jend; j<=jend+RADIUS-1; j++) for (i=istart; i<=iend; i++) { IN(i,j) = top_buf_in[sw][kk++]; } } if (my_IDy > 0) { for (kk=0,j=jstart-RADIUS; j<=jstart-1; j++) for (i=istart; i<=iend; i++) { IN(i,j) = bottom_buf_in[sw][kk++]; } } if (my_IDx < Num_procsx-1) { for (kk=0,j=jstart; j<=jend; j++) for (i=iend; i<=iend+RADIUS-1; i++) { IN(i,j) = right_buf_in[sw][kk++]; } } if (my_IDx > 0) { for (kk=0,j=jstart; j<=jend; j++) for (i=istart-RADIUS; i<=istart-1; i++) { IN(i,j) = left_buf_in[sw][kk++]; } } /* Apply the stencil operator */ for (j=MAX(jstart,RADIUS); j<=MIN(n-RADIUS-1,jend); j++) { for (i=MAX(istart,RADIUS); i<=MIN(n-RADIUS-1,iend); i++) { #if LOOPGEN #include "loop_body_star.incl" #else for (jj=-RADIUS; jj<=RADIUS; jj++) OUT(i,j) += WEIGHT(0,jj)*IN(i,j+jj); for (ii=-RADIUS; ii<0; ii++) OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j); for (ii=1; ii<=RADIUS; ii++) OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j); #endif } } /* add constant to solution to force refresh of neighbor data, if any */ for (j=jstart; j<jend; j++) for (i=istart; i<iend; i++) IN(i,j)+= 1.0; } local_stencil_time[0] = wtime() - local_stencil_time[0]; shmem_barrier_all(); shmem_double_max_to_all(&stencil_time[0], &local_stencil_time[0], 1, 0, 0, Num_procs, pWrk_time, pSync_reduce); /* compute L1 norm in parallel */ local_norm[0] = (DTYPE) 0.0; for (j=MAX(jstart,RADIUS); j<MIN(n-RADIUS,jend); j++) { for (i=MAX(istart,RADIUS); i<MIN(n-RADIUS,iend); i++) { local_norm[0] += (DTYPE)ABS(OUT(i,j)); } } shmem_barrier_all(); #ifdef DOUBLE shmem_double_sum_to_all(&norm[0], &local_norm[0], 1, 0, 0, Num_procs, pWrk_norm, pSync_reduce); #else shmem_float_sum_to_all(&norm[0], &local_norm[0], 1, 0, 0, Num_procs, pWrk_norm, pSync_reduce); #endif /******************************************************************************* ** Analyze and output results. ********************************************************************************/ /* verify correctness */ if (my_ID == root) { norm[0] /= f_active_points; if (RADIUS > 0) { reference_norm = (DTYPE) (iterations+1) * (COEFX + COEFY); } else { reference_norm = (DTYPE) 0.0; } if (ABS(norm[0]-reference_norm) > EPSILON) { printf("ERROR: L1 norm = "FSTR", Reference L1 norm = "FSTR"\n", norm[0], reference_norm); error = 1; } else { printf("Solution validates\n"); #ifdef VERBOSE printf("Reference L1 norm = "FSTR", L1 norm = "FSTR"\n", reference_norm, norm[0]); #endif } } bail_out(error); if (my_ID == root) { /* flops/stencil: 2 flops (fma) for each point in the stencil, plus one flop for the update of the input of the array */ flops = (DTYPE) (2*stencil_size+1) * f_active_points; avgtime = stencil_time[0]/iterations; printf("Rate (MFlops/s): "FSTR" Avg time (s): %lf\n", 1.0E-06 * flops/avgtime, avgtime); } prk_shmem_free(top_buf_in); prk_shmem_free(right_buf_in); free(top_buf_out); free(right_buf_out); prk_shmem_free(pSync_bcast); prk_shmem_free(pSync_reduce); prk_shmem_free(pWrk_time); prk_shmem_free(pWrk_norm); prk_shmem_finalize(); exit(EXIT_SUCCESS); }