Exemple #1
0
/**
* @brief Check if any chstat process is listening. Bailout if not.
*/
static void ipc_require_listener(void) 
{

	/* While we are in the mutual exclusive section, we don't listen to signals, because it would be hard to reset the state of the mutex */
	sigset_t blocked_signals, curr_set;
	(void) sigfillset(&blocked_signals);
	(void) sigprocmask(SIG_BLOCK, &blocked_signals, &curr_set);

	/* Mutual exclusive section so no other process removes shm segment meanwhile */
	if(semdown(mtx) == -1) {
		bail_out(EXIT_FAILURE, "Error downing mutex");
	}
	
	/* Do we have any listeners? */
	if((shared->flag & READER_F) != 1) {
		(void) semup(mtx); // Because ipc_shutdown() tries to down the mutex
		bail_out(EXIT_SUCCESS, "No chstat process listening.");
	}

	/* Release Mutex */
	if(semup(mtx) == -1) {
		bail_out(EXIT_FAILURE, "Error downing mutex");
	}
	
	/* Reset to original procmask*/
	(void) sigprocmask(SIG_SETMASK, &curr_set,  NULL);

}
Exemple #2
0
void init_rand(double **primary, double **vectors){
  debug("init\n");
  srand((unsigned int)time((time_t*) NULL));

  for (int i = 0; i < options.n; i++){
    primary[i] = malloc(options.m * sizeof(double));
    if (primary[i] == NULL){
      bail_out(EXIT_FAILURE, "malloc primary[%d]", i);
    }
    for (int j = 0; j < options.m; j++){
      primary[i][j] = rand_double();
    }
  }

  int vec_len = 0;
  for (int i = 0; i < NUM_VEC; i++){
    if (i%2 == 0){
      vec_len = options.m;
    }
    else{
      vec_len = options.n;
    }
    vectors[i] = malloc(vec_len * sizeof(double));
    if (vectors[i] == NULL){
      bail_out(EXIT_FAILURE, "malloc vectors[%d]\n", i);
    }
    for (int j = 0; j < vec_len; j++){
      vectors[i][j] = rand_double();
    }
  }
  debug("in init: %f", vectors[0][0]);
}
Exemple #3
0
/**
 * @brief Init list with all possible solutions
 * @return List with possible solutions
 */
struct sol_node *init_solver(void)
{	
	struct sol_node *sol, *it;
	int i, j, k, s;

	/* Allocate list head */
	sol = (struct sol_node *)
		calloc(1, sizeof(struct sol_node));
	if (sol == NULL)
		bail_out(EXIT_FAILURE, "calloc");

	/* Allocate columns of the solution set */
	for (i = 1, it = sol; i < CMAX; i++, it = it->next) {
		it->next = (struct sol_node *)
			calloc(1, sizeof(struct sol_node));
		if (it->next == NULL)
			bail_out(EXIT_FAILURE, "calloc");
	}

	/* Fill solution set with data */
	s = CMAX / COLORS;
	for (j = 0; j < SLOTS; j++, s = s / COLORS) {
		for (it = sol, i = 0, k = -1; it != NULL;
		     i++, it = it->next) {
			/* Flip k to next number/color */
			if (i % s == 0)
				k = (k + 1) % COLORS;
			it->slots[j] = k;
		}
	}
	return sol;
}
Exemple #4
0
int main(int argc, char** argv)
{
	int ret;
	int wait = 0;
	int opt;
	startup_info_t info;

	struct rt_task rt;
	FILE *file;

	while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
		switch (opt) {
		case 'w':
			wait = 1;
			break;
		case ':':
			usage("Argument missing.");
			break;
		case '?':
		default:
			usage("Bad argument.");
			break;
		}
	}

	signal(SIGUSR1, SIG_IGN);

	if (argc - optind < 2)
		usage("Arguments missing.");

	if ((file = fopen(argv[optind + 0], "r")) == NULL) {
		fprintf(stderr, "Cannot open %s\n", argv[1]);
		return -1;
	}

	memset(&rt, 0, sizeof(struct rt_task));

	if (parse_hime_ts_file(file, &rt) < 0)
		bail_out("Could not parse file\n");

	if (sporadic_task_ns_semi(&rt) < 0)
		bail_out("could not setup rt task params");

	fclose(file);

	info.exec_path = argv[optind + 1];
	info.argv      = argv + optind + 1;
	info.wait      = wait;

	ret = create_rt_task_semi(launch, &info, &rt);

	if (ret < 0)
		bail_out("could not create rt child process");

	return 0;
}
Exemple #5
0
/**
 * @brief Main child function
 * @details Exits after execution
 * @param id Number of the created child (starting with 1)
 * @param opts Parameters from command line, PRE: != null
 * @param ipipe Input pipe for a child process
 *        PRE: != null, size = 2 * int
 * @param opipe Output pipe for a child process
 *        PRE: != null, size = 2 * int
 */
void child_main(int id, struct options *opts, int *ipipe,
		int *opipe)
{
	char buff[SECRET_MAX];
	FILE *stream;

	/* Open input stream */
	stream = fdopen(ipipe[0], "r");
	if (stream == NULL)
		bail_out(EXIT_FAILURE, "fdopen");
	
	/* Read from input pipe */
	if (close(ipipe[1]) == -1)
		bail_out(EXIT_FAILURE, "close");
	if (quit == 0 && fread(buff, 1, opts->slen,
			       stream) == -1) {
		bail_out(EXIT_FAILURE, "read");
	}
	if (close(ipipe[0]) == -1)
		bail_out(EXIT_FAILURE, "close");

	/* Randomly alter string, verbose output */
	if (quit == 0 && opts->verbose) {
		(void) printf("child%02d: erhalten: %s\n",
			      id, buff);
	}
	alter_string(id, buff, opts->slen - 1);
	if (quit == 0 && opts->verbose) {
		(void) printf("child%02d: weiter  : %s\n",
			      id, buff);
	}

	/* Handle output */
	if (id < opts->num_childs) {
		if (close(opipe[0]) == -1)
			bail_out(EXIT_FAILURE, "close");
		/* Get file descriptor for writing */
		stream = fdopen(opipe[1], "w");
		if (stream == NULL)
			bail_out(EXIT_FAILURE, "fdopen");
		if (quit == 0 && fwrite(buff, 1, opts->slen,
					stream) == -1) {
			bail_out(EXIT_FAILURE, "write");
		}
		if (fflush(stream))
			bail_out(EXIT_FAILURE, "fflush");
		if (close(opipe[1]) == -1)
			bail_out(EXIT_FAILURE, "close");
	} else {
		(void) printf("child%02d: Ende    : %s\n",
			      id, buff);
	}

	/* Return failure on exit */
	if (quit == 1)
		exit(EXIT_FAILURE);
	exit(EXIT_SUCCESS);
}
Exemple #6
0
/**
 * @brief Parse arguments
 * @details Check if arguments are correct and if a verbose 
 *         option has been specified
 *         Global variables: pname
 * @param argc Number of arguments
 * @param argv Argument array
 * @param opts Option structure, PRE: opts != null
 * @param secret String to store secret in, PRE: != null
 * @return True, if the verbose flag has been found
 */
static void parse_args(int argc, char **argv,
		       struct options *opts, char *secret)
{
	int opt;
	char *endptr;
	size_t seclen;

	/* Store program name */
	pname = "";
	if (argc > 0)
		pname = argv[0];

	/* Parse options */
	opt = getopt(argc, argv, "v");
	while (opt != -1) {
		switch (opt) {
		case 'v':
			opts->verbose = true;
			break;
		default:
			usage();
			break;
		}
		opt = getopt(argc, argv, "v");
	}

	/* Parse remaining arguments */
	if (argc - optind != 2)
		usage();

	/* Get number of child processes */
	errno = 0;
	opts->num_childs = strtol(argv[optind], &endptr, 10);
	if (errno != 0 || argv[1] == endptr) {
		bail_out(EXIT_FAILURE, "Could not read number "
			 " of child processes");
	}

	/* Store secret string */
	seclen = strlen(argv[optind + 1]);
	if (seclen >= SECRET_MAX) {
		bail_out(EXIT_FAILURE, "Secret string too long!"
			 " Max: %d", SECRET_MAX);
	}
	(void) memset(secret, 0, SECRET_MAX);
	(void) memcpy(secret, argv[optind + 1], seclen);
	opts->slen = seclen + 1;
}
Exemple #7
0
void insertUnitStrInFilename(char *img_filename_root, configInfo *par, imageInfo *img, const int im, const int unit_index){
  char *temp_filename, *temp_extensionless_filename, message[STR_LEN_0];
  static char* unit_names[] = {"Kelvin", "Jansky-per-px", "SI", "LSun-per-px", "Tau", "#Rays"};
  char *ext;

  /* Check if unit index falls outside range of possible unit names */
  if(unit_index < 0 || unit_index > sizeof(unit_names)/sizeof(*unit_names) - 1){
    sprintf(message, "Image unit index '%d' does not have a corresponding unit name", unit_index);
    if(!silent) bail_out(message);
    exit(0);
  }

  copyInparStr(img_filename_root, &(temp_filename));
  /* Extract filename extension */
  ext = strrchr(img_filename_root, '.');
  if (!ext) {
    /* Set to blank string if no filename extension was extracted */
    ext = "";
  } else {
    /* Remove extension from temporary filename */
      temp_extensionless_filename = removeFilenameExtension(temp_filename, '.', '/');
      strcpy(temp_filename, temp_extensionless_filename);
      free(temp_extensionless_filename);
  }
  /* Append unit name to temporary filename */
  strcat(temp_filename, "_");
  strcat(temp_filename, unit_names[img[im].imgunits[unit_index]]);
  strcat(temp_filename, ext);

  /* Update image filename from temporary filename */
  copyInparStr(temp_filename, &(img[im].filename));
  free(temp_filename);
}
Exemple #8
0
char *removeFilenameExtension(char* inStr, char extensionChar, char pathSeparator) {
    char *outStr, *lastDotInFilename, *lastPathSeparatorInFilename;

    if (inStr == NULL)
        return NULL;

    outStr = malloc(strlen(inStr) + 1);
    if(!outStr){
        if(!silent) bail_out("Error allocating memory for filename extension removal");
        exit(0);
    }
    strcpy(outStr, inStr);
    /* Find last occurrences of extension character and path separator character */
    lastDotInFilename = strrchr(outStr, extensionChar);
    lastPathSeparatorInFilename = (pathSeparator == 0) ? NULL : strrchr(outStr, pathSeparator);

    /* Truncate filename at occurrence of last extension character assuming it comes after the last path separator character */
    if (lastDotInFilename != NULL) {
        if (lastPathSeparatorInFilename != NULL) {
            if (lastPathSeparatorInFilename < lastDotInFilename) {
                *lastDotInFilename = '\0';
            }
        } else {
            *lastDotInFilename = '\0';
        }
    }
    return outStr;
}
Exemple #9
0
static void create_game(struct ClientList *el_cur) 
{
    if (el_cur->game_count >= dict_size) {
        /* No new game possible. */
        el_cur->status_id = EndGame;
    } else {
        /* Assign secret word. */
        DEBUG("secret word before: %s\n", strings[el_cur->game_count]);
        strncpy(el_cur->secret_word, strings[el_cur->game_count], MAX_DATA);
        el_cur->errors = 0;
        el_cur->status_id = Running; 
        el_cur->game_count = el_cur->game_count + 1; 
        if (memset(el_cur->client_word, '_', MAX_DATA) != el_cur->client_word) {
            bail_out(EXIT_FAILURE, "memset(3) failed\n");
        }
         
        int i = 0;
        while(el_cur->secret_word[i] != '\0') {
            if (el_cur->secret_word[i] == ' ') {
                el_cur->client_word[i] = ' ';
            }
            i++;
        }
        el_cur->client_word[i] = '\0';
        
        DEBUG("Spiel erstellt: %s Secret Word: %s\n", 
                shared->s_word, el_cur->secret_word);
    }
}
Exemple #10
0
void SkypeSend(char *szMsg) {
   COPYDATASTRUCT CopyData;
   int count=0;

   if (!hSkypeWnd) {
	   LOG(("SkypeSend: DAMN! No Skype window handle! :("));
	   return;
   }
   if (strcmp(szMsg, "PING")) {LOG(("> %s", szMsg));}
   CopyData.dwData=0; 
   CopyData.lpData=szMsg; 
   CopyData.cbData=strlen(szMsg)+1;
   while (!SendMessageTimeout(hSkypeWnd, WM_COPYDATA, (WPARAM)hWnd, (LPARAM)&CopyData, SMTO_ABORTIFHUNG, 3000, NULL)) {
	   count++;
	   LOG(("SkypeSend: failed, try #%d", count));
	   if (count==5) {
		   OUTPUT("Sending message to Skype failed too often.");
		   OUTPUT("Skype may have died unexpectedly, I will try to restart it.");
		   ConnectToSkypeAPI((void *)TRUE);
		   OUTPUT("Restart complete. Trying to deliver message one more time.");
		   if (!SendMessageTimeout(hSkypeWnd, WM_COPYDATA, (WPARAM)hWnd, (LPARAM)&CopyData, SMTO_ABORTIFHUNG, 3000, NULL)) {
			   OUTPUT("It still failed. Skype seems to be completely f*cked up. I've given up. Bye..");
			   bail_out(1);
			   break;
		   } else { 
			   OUTPUT("Now it worked! :)");
			   break;
		   }
	   }
	   Sleep(1000);
   }
}
Exemple #11
0
void iterate(double **primary, double **vectors)
{
  double start, finish;
  
  double **secondary = malloc(options.n * sizeof(double*));
  if (secondary == NULL){
    bail_out(EXIT_FAILURE, "malloc secondary");
  }
  for (int i = 0; i < options.n; i++){
    secondary[i] = calloc(options.m, sizeof(double));
    if (secondary[i] == NULL){
      bail_out(EXIT_FAILURE, "malloc secondary[%d]", i);
    }
  }

  start = omp_get_wtime();
  for (int i = 0; i < options.iter; i++){
    for(int j = 0; j < options.n; j++){
      for(int k = 0; k < options.m; k++){
        update(primary, secondary, j, k, vectors);
      }
        finish = omp_get_wtime();
  	double usec_diff = finish - start;
    }
    double **temp = primary;
    primary = secondary;
    secondary = temp;
  }
  finish = omp_get_wtime();
  
  double usec_diff = finish - start;
  fprintf(stderr,"loop time = %f\n", usec_diff);
  
  if (options.iter % 2 == 1){
    double **temp = primary;
    primary = secondary;
    secondary = temp;
    for (int i = 0; i < options.n; i++){
      memcpy(primary[i],secondary[i],options.m * sizeof(double));
    } 
  }
  
  for (int i = 0; i < options.n; i++){
    free(secondary[i]);
  }
  free(secondary);
}
Exemple #12
0
void plan(unsigned tests) throw(fatal_exception) {
    if (is_planned) {
        bail_out("Can't plan again!");
    }
    is_planned = true;
    output_plan(tests);
    expected = tests;
}
Exemple #13
0
void writeFits(const int i, const int unit_index, configInfo *par, imageInfo *img){
  int unitI = img[i].imgunits[unit_index];

  if(unitI>5){
    if(!silent) bail_out("Image unit number invalid");
exit(1);
  }
  write4Dfits(i, unit_index, par, img);
}
Exemple #14
0
int main(int argc, char* argv[])
{
  pid_t child;
  int ret; 
  char cmd;
  int child_exit;

  if (argc < 2) {
    fprintf(stderr, "Must specify command to run in background\n");
    exit(-1);
  }
  TRY(child=fork());

  if (child == 0) { /* child */
    pid_t gchild;
    TRY(setpgid(getpid(), getpid())); /* create process group */
    
    TRY(gchild=fork());
    if (gchild == 0) { /* grandchild */
      TRY(execvp(argv[1],&argv[1]));      
    }
    exit(0);
  }
  /* parent */    

  signal(SIGALRM, alarm_handler);
  alarm(10*60); /* suicide in case nothing happens */

  TRY(wait(&child_exit));
  if (!WIFEXITED(child_exit) || WEXITSTATUS(child_exit)!=0) {
    fprintf(stderr, "child did not exit normally (status=%d)\n", child_exit);
    exit(-1);
  }

  for (;;)
    {
      TRY(ret=read(STDIN_FILENO, &cmd, 1));
      if (ret == 0) break; /* eof -> exit */
      switch (cmd)
	{
	case 'K':
	  ret = kill(-child, SIGINT); /* child process _group_ */
	  if (ret < 0 && errno != ESRCH) {
	    bail_out("kill failed");
	  }
 	  write(STDOUT_FILENO, &cmd, 1); /* echo ack */
 	  break;
 	case '\n':
 	  break;/* ignore (for interactive testing) */
 	default:
 	  fprintf(stderr, "Unknown command '%c'\n", cmd);
	  exit(-1);
 	}
     }  

  return 0;
}
Exemple #15
0
/**
 * @brief Initialize signal handling
 * @details Assign the signal handler routine, 
 *          Global variables: quit
 */
static void sig_init()
{
	int signals[] = {SIGTERM, SIGINT};
	struct sigaction sa;
	int i;

	/* Initialize sigaction */
	quit = 0;
	sa.sa_handler = handler;
	sa.sa_flags = 0;
	if (sigfillset(&sa.sa_mask) == -1)
		bail_out(EXIT_FAILURE, "sigemptyset");

	/* Assign sigactions */
	for (i = 0; i < sizeof(signals) / sizeof(int); i++) {
		if (sigaction(signals[i], &sa, NULL) == -1)
			bail_out(EXIT_FAILURE, "sigaction");
	}
}
Exemple #16
0
LONG APIENTRY WndProc(HWND hWnd, UINT message, UINT wParam, LONG lParam) 
{ 
    PCOPYDATASTRUCT CopyData; 
	char *szSkypeMsg=NULL;

    switch (message) 
    { 
        case WM_COPYDATA: 
//		 LOG("WM_COPYDATA", "start");
		 if(hSkypeWnd==(HWND)wParam) { 
			CopyData=(PCOPYDATASTRUCT)lParam;
			szSkypeMsg=strdup(CopyData->lpData);
			ReplyMessage(1);
			if (!strcmp(szSkypeMsg, "PONG")) {
				WatchDog=1;
				break;
			} // Hide PING-PONG
			LOG(("< %s", szSkypeMsg));
			if (!strcmp(szSkypeMsg, "USERSTATUS LOGGEDOUT")) {
				OUTPUT("Skype shut down gracefully. I'll leave too, bye.. :)");
				bail_out(1);
			}
#ifdef USE_AUTHENTICATION
			if (password && !Authenticated) break;
#endif
			if (AcceptSocket!=INVALID_SOCKET) {
				unsigned int length=strlen(szSkypeMsg);

				if (send(AcceptSocket, (char *)&length, sizeof(length), 0)==SOCKET_ERROR ||
					send(AcceptSocket, szSkypeMsg, length, 0)==SOCKET_ERROR) 
					OUTPUT("Cannot send to client :(");
			}
		 }
        break; 

        case WM_DESTROY: 
            PostQuitMessage(0); 
        break; 

        default: 
		 if(message==ControlAPIAttach) {
				// Skype responds with Attach to the discover-message
				AttachStatus=lParam;
				if (AttachStatus==SKYPECONTROLAPI_ATTACH_SUCCESS) 
					hSkypeWnd=(HWND)wParam;	   // Skype gave us the communication window handle
				if (AttachStatus!=SKYPECONTROLAPI_ATTACH_API_AVAILABLE)
					SetEvent(SkypeReady);
				break;
		 }
		 return (DefWindowProc(hWnd, message, wParam, lParam)); 
    }
//	LOG("WM_COPYDATA", "exit");
	if (szSkypeMsg) free(szSkypeMsg);
	return 1;
} 
Exemple #17
0
int main(int argc, char** argv) {
	FILE *fi, *fo;
	int c, i;

	self = argv[0];

	if (argc != 4) {
		usage();
		return 0;
	}

	if ((fi = fopen(argv[1], "rb")) == 0)
		bail_out("Cannot open input file ", argv[1]);

	if ((fo = fopen(argv[2], "w")) == 0)
		bail_out("Cannot open output file ", argv[2]);

	if ((c = fgetc(fi)) != EOF) {
		fprintf(fo, GPL);
		fprintf(fo, "#ifndef %s_H\n", argv[3]);
		fprintf(fo, "#define %s_H\n\n", argv[3]);
		fprintf(fo, "const unsigned char %s[] = {\n", argv[3]);
		fprintf(fo, c < 16 ? "   0x%02x" : "    0x%02x", (unsigned char) c);
	}

	i = 1;
	while ((c = fgetc(fi)) != EOF) {
		if (i < 12)
			fprintf(fo, c < 16 ? ", 0x%02x" : ", 0x%02x", (unsigned char) c);
		else {
			fprintf(fo, c < 16 ? ",\n   0x%02x" : ",\n   0x%02x", (unsigned char) c);
			i = 0;
		}
		i++;
	}
	fprintf(fo, "\n};\n\n");
	fprintf(fo, "#endif\n");

	printf("converted %s\n", argv[1]);

	return 0;
}
Exemple #18
0
/*....................................................................*/
void checkFwrite(const size_t fwriteResult, const size_t expectedNum, char *message){
  char string[STR_LEN_0];

  if(fwriteResult!=expectedNum){
    if(!silent){
      snprintf(string, STR_LEN_0, "fwrite() failed to write %s. Expected %d got %d", message, (int)expectedNum, (int)fwriteResult);
      bail_out(string);
    }
exit(1);
  }
}
Exemple #19
0
/*....................................................................*/
void checkFscanf(const int fscanfResult, const int expectedNum, char *message){
  char string[STR_LEN_0];

  if(fscanfResult!=expectedNum){
    if(!silent){
      snprintf(string, STR_LEN_0, "fscanf() failed to read %s - read %d bytes when %d expected.", message, fscanfResult, expectedNum);
      bail_out(string);
    }
exit(1);
  }
}
Exemple #20
0
/*....................................................................*/
void checkFgets(char *fgetsResult, char *message){
  char string[STR_LEN_0];

  if(fgetsResult==NULL){
    if(!silent){
      snprintf(string, STR_LEN_0, "fgets() failed to read %s", message);
      bail_out(string);
    }
exit(1);
  }
}
Exemple #21
0
local_shared_block_ptrs shared_2d_array_alloc(int sizex, int sizey, int offsetx, int offsety){
  long int alloc_size = sizex * sizey * sizeof(DTYPE);
  local_shared_block ptr;

  ptr = upc_alloc(alloc_size);
  if(ptr == NULL)
    bail_out("Failing shared allocation of %d bytes", alloc_size);

  long int line_ptrs_size = sizeof(local_shared_block) * sizey;
  local_shared_block_ptrs line_ptrs = upc_alloc(line_ptrs_size);
  if(line_ptrs == NULL)
    bail_out("Failing shared allocation of %d bytes", line_ptrs_size);

  for(int y=0; y<sizey; y++){
    line_ptrs[y] = ptr + (y * sizex) - offsetx;
  }

  line_ptrs -= offsety;

  return line_ptrs;
}
Exemple #22
0
static char *get_strings() 
{
    char *string = NULL;
    char ch;
    size_t len = 0;
    while (string == NULL && ch != EOF) {
    while (EOF != (ch = fgetc(in_stream)) && ch != '\n') {
        if (ch != ' ' && isalpha((int)ch) == 0) {
           // fprintf(stderr, "Only [a-z] is a valid input. | \t"
           //                     "| Input another or end with CTRL+D: ");
            continue;
        }
        string  = (char*) realloc(string, len+2);
        if (string == NULL) {
            bail_out(EXIT_FAILURE, "realloc(3) failed");
        }
        string[len++] = toupper(ch);

        if (len >= MAX_DATA) {
            bail_out(EXIT_FAILURE, "Input too long\n");
        }
    }
    if (ferror(in_stream)) {
        bail_out(EXIT_FAILURE, "Error while reading from stream");
    }
    }

    if(string) {
        string[len] = '\0';
    } else {
        printf("\nFinished dictionary...\n");
        return string;
    }

    DEBUG("Added string: %s | Input another [a-z] or end with CTRL+D: ", string); 
    return string;
}
Exemple #23
0
static void read_dict()
{
    int index;
    for (index = 0; (string = get_strings()); ++index) {
        if (string[0] == '\0') continue; 
        strings = (char**) realloc(strings, (index+1)*sizeof(*strings));
        if (strings == NULL) {
            bail_out(EXIT_FAILURE, "realloc(3) failed");
        }
        strings[index] = string;
    }

    /* Take a note of how many entries we have yet. */
    dict_size = index;
}
Exemple #24
0
private_shared_block_ptrs partially_privatize(local_shared_block_ptrs array, int thread){
  int sizey = thread_sizey[thread];
  int offsety = thread_offsety[thread];

  long int alloc_size = sizey * sizeof(local_shared_block);
  private_shared_block_ptrs ptr = prk_malloc(alloc_size);
  if(ptr == NULL)
    bail_out("Unable to allocate array2");

  ptr -= offsety;
  for(int y=offsety; y<offsety + sizey; y++)
    ptr[y] = (&array[y][0]);

  return ptr;
}
Exemple #25
0
/**
 * Cleanup parent process
 * @brief Close fds and files and wait for the end of the child process whcih is triggered by closing the pipe.
 * @details global variables: {writing,reading}_pipe, pipes_saved
 */
static void cleanup_parent()
{
	(void) fclose(reading_pipe);
	(void) fclose(writing_pipe);
	(void) close(*(pipes_saved + 1));
	(void) close(*(pipes_saved + 2));
	
	DEBUG ("WAIT PARENT\n");
	
	int wait_result= wait(NULL);
	
	if (wait_result == -1) {
		bail_out("Wait: No child process found");
	}
	
	DEBUG ("EXIT PARENT\n");
}
Exemple #26
0
int
pointEvaluation(inputPars *par,double ran, double x, double y, double z){
  double weight1, weight2, val[99],normalizer=0.0,totalDensity=0.0;
  int i;

  density(par->minScale,par->minScale,par->minScale,val);
  for (i=0;i<par->collPart;i++) normalizer += val[i];
  if (normalizer<=0.){
    if(!silent) bail_out("Error: Sum of reference densities equals 0");
    exit(1);
  }
  //abundance(par->minScale,par->minScale,par->minScale,val2);
  density(x,y,z,val);
  for (i=0;i<par->collPart;i++) totalDensity += val[i];
  //abundance(x,y,z,val2);
  weight1=pow(totalDensity/normalizer,0.2);

  weight2=0.;

  if(ran < weight1 || ran < weight2) return 1;
  else return 0;
}
Exemple #27
0
/* Initializes the grid of charges */
double *initializeGrid(bbox_t tile) {
  double   *grid;
  uint64_t x, y, n_columns, n_rows;
  int      error=0, my_ID;

  n_columns = tile.right-tile.left+1;
  n_rows = tile.top-tile.bottom+1;
   
  grid = (double*) prk_malloc(n_columns*n_rows*sizeof(double));
  if (grid == NULL) {
    MPI_Comm_rank(MPI_COMM_WORLD, &my_ID);
    printf("ERROR: Process %d could not allocate space for grid\n", my_ID);
    error = 1;
  }
  bail_out(error);
   
  /* So far supporting only initialization with dipoles */
  for (y=tile.bottom; y<=tile.top; y++) {
    for (x=tile.left; x<=tile.right; x++) {
      grid[y-tile.bottom+(x-tile.left)*n_rows] = (x%2 == 0) ? Q : -Q;
    }
  }
  return grid;
}
Exemple #28
0
int main(int argc, char ** argv) {

  long   order;         /* order of a the matrix                           */
  int    Tile_order=32; /* default tile size for tiling of local transpose */
  int    iterations;    /* number of times to do the transpose             */
  int    tiling;        /* boolean: true if tiling is used                 */
  int    i, j, it, jt, iter;  /* dummies                                   */
  double bytes;         /* combined size of matrices                       */
  double * RESTRICT A;  /* buffer to hold original matrix                  */
  double * RESTRICT B;  /* buffer to hold transposed matrix                */
  double abserr;        /* absolute error                                  */
  double epsilon=1.e-8; /* error tolerance                                 */
  double transpose_time,/* timing parameters                               */
         avgtime;
  int    nthread_input, 
         nthread;
  int    num_error=0;     /* flag that signals that requested and 
                             obtained numbers of threads are the same      */

  /*********************************************************************
  ** read and test input parameters
  *********************************************************************/

  printf("Parallel Research Kernels version %s\n", PRKVERSION);
  printf("OpenMP Matrix transpose: B = A^T\n");

  if (argc != 4 && argc != 5){
    printf("Usage: %s <# threads> <# iterations> <matrix order> [tile size]\n",
           *argv);
    exit(EXIT_FAILURE);
  }

  /* Take number of threads to request from command line */
  nthread_input = atoi(*++argv); 

  if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) {
    printf("ERROR: Invalid number of threads: %d\n", nthread_input);
    exit(EXIT_FAILURE);
  }

  omp_set_num_threads(nthread_input);

  iterations  = atoi(*++argv); 
  if (iterations < 1){
    printf("ERROR: iterations must be >= 1 : %d \n",iterations);
    exit(EXIT_FAILURE);
  }

  order = atoi(*++argv); 
  if (order < 0){
    printf("ERROR: Matrix Order must be greater than 0 : %d \n", order);
    exit(EXIT_FAILURE);
  }

  if (argc == 5) Tile_order = atoi(*++argv);
  /* a non-positive tile size means no tiling of the local transpose */
  tiling = (Tile_order > 0) && (Tile_order < order);
  if (!tiling) Tile_order = order;

  /*********************************************************************
  ** Allocate space for the input and transpose matrix
  *********************************************************************/

  A   = (double *)malloc(order*order*sizeof(double));
  if (A == NULL){
    printf(" ERROR: cannot allocate space for input matrix: %ld\n", 
           order*order*sizeof(double));
    exit(EXIT_FAILURE);
  }
  B  = (double *)malloc(order*order*sizeof(double));
  if (B == NULL){
    printf(" ERROR: cannot allocate space for output matrix: %ld\n", 
           order*order*sizeof(double));
    exit(EXIT_FAILURE);
  }

  bytes = 2.0 * sizeof(double) * order * order;

  #pragma omp parallel private (iter)
  {  

  #pragma omp master
  {
  nthread = omp_get_num_threads();
  if (nthread != nthread_input) {
    num_error = 1;
    printf("ERROR: number of requested threads %d does not equal ",
           nthread_input);
    printf("number of spawned threads %d\n", nthread);
  } 
  else {
    printf("Number of threads     = %i;\n",nthread_input);
    printf("Matrix order          = %ld\n", order);
    printf("Number of iterations  = %d\n", iterations);
    if (tiling) {
      printf("Tile size             = %d\n", Tile_order);
#ifdef COLLAPSE
      printf("Using loop collapse\n");
#endif
    }
    else                   
      printf("Untiled\n");
  }
  }
  bail_out(num_error);

  /*  Fill the original matrix, set transpose to known garbage value. */

  if (tiling) {
#ifdef COLLAPSE
    #pragma omp for private (i,it,jt) collapse(2)
#else
    #pragma omp for private (i,it,jt)
#endif
    for (j=0; j<order; j+=Tile_order) 
      for (i=0; i<order; i+=Tile_order) 
        for (jt=j; jt<MIN(order,j+Tile_order);jt++)
          for (it=i; it<MIN(order,i+Tile_order); it++){
            A(it,jt) = (double) (order*jt + it);
            B(it,jt) = 0.0;
          }
  }
  else {
    #pragma omp for private (i)
    for (j=0;j<order;j++) 
      for (i=0;i<order; i++) {
        A(i,j) = (double) (order*j + i);
        B(i,j) = 0.0;
      }
  }

  for (iter = 0; iter<=iterations; iter++){

    /* start timer after a warmup iteration                                        */
    if (iter == 1) { 
      #pragma omp barrier
      #pragma omp master
      {
        transpose_time = wtime();
      }
    }

    /* Transpose the  matrix                                                       */
    if (!tiling) {
      #pragma omp for private (j)
      for (i=0;i<order; i++) 
        for (j=0;j<order;j++) { 
          B(j,i) += A(i,j);
          A(i,j) += 1.0;
        }
    }
    else {
#ifdef COLLAPSE
      #pragma omp for private (j,it,jt) collapse(2)
#else
      #pragma omp for private (j,it,jt)
#endif
      for (i=0; i<order; i+=Tile_order) 
        for (j=0; j<order; j+=Tile_order) 
          for (it=i; it<MIN(order,i+Tile_order); it++) 
            for (jt=j; jt<MIN(order,j+Tile_order);jt++) {
              B(jt,it) += A(it,jt);
              A(it,jt) += 1.0;
            } 
    }	

  }  /* end of iter loop  */

  #pragma omp barrier
  #pragma omp master
  {
    transpose_time = wtime() - transpose_time;
  }

  } /* end of OpenMP parallel region */

  abserr =  test_results (order, B, iterations);

  /*********************************************************************
  ** Analyze and output results.
  *********************************************************************/

  if (abserr < epsilon) {
    printf("Solution validates\n");
    avgtime = transpose_time/iterations;
    printf("Rate (MB/s): %lf Avg time (s): %lf\n",
           1.0E-06 * bytes/avgtime, avgtime);
#ifdef VERBOSE
    printf("Squared errors: %f \n", abserr);
#endif
    exit(EXIT_SUCCESS);
  }
  else {
    printf("ERROR: Aggregate squared error %lf exceeds threshold %e\n",
           abserr, epsilon);
    exit(EXIT_FAILURE);
  }

}  /* end of main */
Exemple #29
0
int main(int argc, char **argv)
{
    long int j, iter;       /* dummies                                     */
    double   scalar;        /* constant used in Triad operation            */
    int      iterations;    /* number of times vector loop gets repeated   */
    long int length,        /* vector length per processor                 */
         total_length,  /* total vector length                         */
         offset;        /* offset between vectors a and b, and b and c */
    double   bytes;         /* memory IO size                              */
    size_t   space;         /* memory used for a single vector             */
    double   nstream_time,  /* timing parameters                           */
             avgtime = 0.0,
             maxtime = 0.0,
             mintime = 366.0*8760.0*3600.0; /* set the minimum time to a
                             large value; one leap year should be enough */
    int      Num_procs,     /* process parameters                          */
             my_ID,         /* rank of calling process                     */
             root=0;        /* ID of master process                        */
    int      error=0;       /* error flag for individual process           */

    /**********************************************************************************
    * process and test input parameters
    ***********************************************************************************/

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&Num_procs);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_ID);

    if (my_ID == root) {
        printf("MPI stream triad: A = B + scalar*C\n");
        if (argc != 4) {
            printf("Usage:  %s <# iterations> <vector length> <offset>\n", *argv);
            error = 1;
            goto ENDOFTESTS;
        }

        iterations   = atoi(*++argv);
        if (iterations < 1) {
            printf("ERROR: Invalid number of iterations: %d\n", iterations);
            error = 1;
            goto ENDOFTESTS;
        }

        total_length = atol(*++argv);
        if (total_length < Num_procs) {
            printf("ERROR: Invalid vector length: %ld\n", total_length);
            error = 1;
            goto ENDOFTESTS;
        }
        else length = total_length/Num_procs;

        offset       = atol(*++argv);
        if (offset < 0) {
            printf("ERROR: Invalid array offset: %ld\n", offset);
            error = 1;
            goto ENDOFTESTS;
        }
#ifdef STATIC_ALLOCATION
        if ((3*length + 2*offset) > N) {
            printf("ERROR: vector length/offset %ld/%ld too ", total_length, offset);
            printf("large; increase MAXLENGTH in Makefile or decrease vector length\n");
            error = 1;
            goto ENDOFTESTS;
        }
#endif
ENDOFTESTS:
        ;
    }
    bail_out(error);

    /* broadcast initialization data */
    MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD);
    MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD);
    MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD);

#ifndef STATIC_ALLOCATION
    space = (3*length + 2*offset)*sizeof(double);
    a = (double *) malloc(space);
    if (!a && my_ID == root) {
        printf("ERROR: Could not allocate %ld bytes for vectors\n", (long int)space);
        error = 1;
    }
    bail_out(error);
#endif
    b = a + length + offset;
    c = b + length + offset;

    bytes   = 3.0 * sizeof(double) * length * Num_procs;

    if (my_ID == root) {
        printf("Number of processes  = %d\n", Num_procs);
        printf("Vector length        = %ld\n", total_length);
        printf("Offset               = %ld\n", offset);
        printf("Number of iterations = %d\n", iterations);
    }

#pragma vector always
    for (j=0; j<length; j++) {
        a[j] = 0.0;
        b[j] = 2.0;
        c[j] = 2.0;
    }

    /* --- MAIN LOOP --- repeat Triad iterations times --- */

    scalar = SCALAR;

    for (iter=0; iter<iterations; iter++) {

        MPI_Barrier(MPI_COMM_WORLD);
        if (my_ID == root) {
            nstream_time = wtime();
        }

#pragma vector always
        for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j];

        if (my_ID == root) {
            if (iter>0 || iterations==1) { /* skip the first iteration */
                nstream_time = wtime() - nstream_time;
                avgtime = avgtime + nstream_time;
                mintime = MIN(mintime, nstream_time);
                maxtime = MAX(maxtime, nstream_time);
            }
        }

        /* insert a dependency between iterations to avoid dead-code elimination */
#pragma vector always
        for (j=0; j<length; j++) b[j] = a[j];
    }

    /*********************************************************************
    ** Analyze and output results.
    *********************************************************************/

    if (my_ID == root) {
        if (checkTRIADresults(iterations, length)) {
            avgtime = avgtime/(double)(MAX(iterations-1,1));
            printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf",
                   1.0E-06 * bytes/mintime, avgtime, mintime);
            printf(", Max time (s): %lf\n", maxtime);
        }
        else error = 1;
    }
    bail_out(error);
    MPI_Finalize();
}
Exemple #30
0
int main(int argc, char ** argv) {
 
  int    Num_procs;       /* number of ranks                                     */
  int    Num_procsx, Num_procsy; /* number of ranks in each coord direction      */
  int    my_ID;           /* SHMEM rank                                          */
  int    my_IDx, my_IDy;  /* coordinates of rank in rank grid                    */
  int    right_nbr;       /* global rank of right neighboring tile               */
  int    left_nbr;        /* global rank of left neighboring tile                */
  int    top_nbr;         /* global rank of top neighboring tile                 */
  int    bottom_nbr;      /* global rank of bottom neighboring tile              */
  DTYPE *top_buf_out;     /* communication buffer                                */
  DTYPE *top_buf_in[2];   /*       "         "                                   */
  DTYPE *bottom_buf_out;  /*       "         "                                   */
  DTYPE *bottom_buf_in[2];/*       "         "                                   */
  DTYPE *right_buf_out;   /*       "         "                                   */
  DTYPE *right_buf_in[2]; /*       "         "                                   */
  DTYPE *left_buf_out;    /*       "         "                                   */
  DTYPE *left_buf_in[2];  /*       "         "                                   */
  int    root = 0;
  int    n, width, height;/* linear global and local grid dimension              */
  int    i, j, ii, jj, kk, it, jt, iter, leftover;  /* dummies                   */
  int    istart, iend;    /* bounds of grid tile assigned to calling rank        */
  int    jstart, jend;    /* bounds of grid tile assigned to calling rank        */
  DTYPE  reference_norm;
  DTYPE  f_active_points; /* interior of grid with respect to stencil            */
  int    stencil_size;    /* number of points in the stencil                     */
  DTYPE  flops;           /* floating point ops per iteration                    */
  int    iterations;      /* number of times to run the algorithm                */
  double avgtime,         /* timing parameters                                   */
         *local_stencil_time, *stencil_time; 
  DTYPE  * RESTRICT in;   /* input grid values                                   */
  DTYPE  * RESTRICT out;  /* output grid values                                  */
  long   total_length_in; /* total required length to store input array          */
  long   total_length_out;/* total required length to store output array         */
  int    error=0;         /* error flag                                          */
  DTYPE  weight[2*RADIUS+1][2*RADIUS+1]; /* weights of points in the stencil     */
  int    *arguments;      /* command line parameters                             */
  int    count_case=4;    /* number of neighbors of a rank                       */
  long   *pSync_bcast;    /* work space for collectives                          */
  long   *pSync_reduce;   /* work space for collectives                          */
  double *pWrk_time;      /* work space for collectives                          */
  DTYPE  *pWrk_norm;      /* work space for collectives                          */
  int    *iterflag;       /* synchronization flags                               */
  int    sw;              /* double buffering switch                             */
  DTYPE  *local_norm, *norm; /* local and global error norms                     */

  /*******************************************************************************
  ** Initialize the SHMEM environment
  ********************************************************************************/
  prk_shmem_init();

  my_ID=prk_shmem_my_pe();
  Num_procs=prk_shmem_n_pes();

  pSync_bcast        = (long *)   prk_shmem_malloc(PRK_SHMEM_BCAST_SYNC_SIZE*sizeof(long));
  pSync_reduce       = (long *)   prk_shmem_malloc(PRK_SHMEM_REDUCE_SYNC_SIZE*sizeof(long));
  pWrk_time          = (double *) prk_shmem_malloc(PRK_SHMEM_REDUCE_MIN_WRKDATA_SIZE*sizeof(double));
  pWrk_norm          = (DTYPE *)  prk_shmem_malloc(PRK_SHMEM_REDUCE_MIN_WRKDATA_SIZE*sizeof(DTYPE));
  local_stencil_time = (double *) prk_shmem_malloc(sizeof(double));
  stencil_time       = (double *) prk_shmem_malloc(sizeof(double));
  local_norm         = (DTYPE *)  prk_shmem_malloc(sizeof(DTYPE));
  norm               = (DTYPE *)  prk_shmem_malloc(sizeof(DTYPE));
  iterflag           = (int *)    prk_shmem_malloc(2*sizeof(int));
  if (!(pSync_bcast && pSync_reduce && pWrk_time && pWrk_norm && iterflag &&
	local_stencil_time && stencil_time && local_norm && norm))
  {
    printf("Could not allocate scalar variables on rank %d\n", my_ID);
    error = 1;
  }
  bail_out(error);

  for(i=0;i<PRK_SHMEM_BCAST_SYNC_SIZE;i++)
    pSync_bcast[i]=PRK_SHMEM_SYNC_VALUE;

  for(i=0;i<PRK_SHMEM_REDUCE_SYNC_SIZE;i++)
    pSync_reduce[i]=PRK_SHMEM_SYNC_VALUE;

  arguments=(int*)prk_shmem_malloc(2*sizeof(int));
 
  /*******************************************************************************
  ** process, test, and broadcast input parameters    
  ********************************************************************************/
 
  if (my_ID == root) {
#ifndef STAR
    printf("ERROR: Compact stencil not supported\n");
    error = 1;
    goto ENDOFTESTS;
#endif
      
    if (argc != 3){
      printf("Usage: %s <# iterations> <array dimension> \n", 
             *argv);
      error = 1;
      goto ENDOFTESTS;
    }
 
    iterations  = atoi(*++argv); 
    arguments[0]=iterations;

    if (iterations < 1){
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
      error = 1;
      goto ENDOFTESTS;  
    }
 
    n  = atoi(*++argv);
    arguments[1]=n;
    long nsquare = (long)n * (long)n;

    if (nsquare < Num_procs){ 
      printf("ERROR: grid size must be at least # ranks: %ld\n", nsquare);
      error = 1;
      goto ENDOFTESTS;
    }
 
    if (RADIUS < 0) {
      printf("ERROR: Stencil radius %d should be non-negative\n", RADIUS);
      error = 1;
      goto ENDOFTESTS;  
    }
 
    if (2*RADIUS +1 > n) {
      printf("ERROR: Stencil radius %d exceeds grid size %d\n", RADIUS, n);
      error = 1;
      goto ENDOFTESTS;  
    }
 
    ENDOFTESTS:;  
  }
  bail_out(error);
 
  /* determine best way to create a 2D grid of ranks (closest to square, for 
     best surface/volume ratio); we do this brute force for now
  */
  for (Num_procsx=(int) (sqrt(Num_procs+1)); Num_procsx>0; Num_procsx--) {
    if (!(Num_procs%Num_procsx)) {
      Num_procsy = Num_procs/Num_procsx;
      break;
    }
  }      
  my_IDx = my_ID%Num_procsx;
  my_IDy = my_ID/Num_procsx;
  /* compute neighbors; don't worry about dropping off the edges of the grid */
  right_nbr  = my_ID+1;
  left_nbr   = my_ID-1;
  top_nbr    = my_ID+Num_procsx;
  bottom_nbr = my_ID-Num_procsx;

  iterflag[0] = iterflag[1] = 0;

  if(my_IDx==0)            count_case--;
  if(my_IDx==Num_procsx-1) count_case--;
  if(my_IDy==0)            count_case--;
  if(my_IDy==Num_procsy-1) count_case--;
 
  if (my_ID == root) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("SHMEM stencil execution on 2D grid\n");
    printf("Number of ranks        = %d\n", Num_procs);
    printf("Grid size              = %d\n", n);
    printf("Radius of stencil      = %d\n", RADIUS);
    printf("Tiles in x/y-direction = %d/%d\n", Num_procsx, Num_procsy);
    printf("Type of stencil        = star\n");
#ifdef DOUBLE
    printf("Data type              = double precision\n");
#else
    printf("Data type              = single precision\n");
#endif
#if LOOPGEN
    printf("Script used to expand stencil loop body\n");
#else
    printf("Compact representation of stencil loop body\n");
#endif
#if SPLITFENCE
    printf("Split fence            = ON\n");
#else
    printf("Split fence            = OFF\n");
#endif
    printf("Number of iterations   = %d\n", iterations);
  }

  shmem_barrier_all();
 
  shmem_broadcast32(&arguments[0], &arguments[0], 2, root, 0, 0, Num_procs, pSync_bcast);

  iterations=arguments[0];
  n=arguments[1];

  shmem_barrier_all();
  prk_shmem_free(arguments);
 
  /* compute amount of space required for input and solution arrays             */
  
  width = n/Num_procsx;
  leftover = n%Num_procsx;
  if (my_IDx<leftover) {
    istart = (width+1) * my_IDx; 
    iend = istart + width + 1;
  }
  else {
    istart = (width+1) * leftover + width * (my_IDx-leftover);
    iend = istart + width;
  }
  
  width = iend - istart + 1;
  if (width == 0) {
    printf("ERROR: rank %d has no work to do\n", my_ID);
    error = 1;
  }
  bail_out(error);
 
  height = n/Num_procsy;
  leftover = n%Num_procsy;
  if (my_IDy<leftover) {
    jstart = (height+1) * my_IDy; 
    jend = jstart + height + 1;
  }
  else {
    jstart = (height+1) * leftover + height * (my_IDy-leftover);
    jend = jstart + height;
  }
  
  height = jend - jstart + 1;
  if (height == 0) {
    printf("ERROR: rank %d has no work to do\n", my_ID);
    error = 1;
  }
  bail_out(error);
 
  if (width < RADIUS || height < RADIUS) {
    printf("ERROR: rank %d has work tile smaller then stencil radius\n",
           my_ID);
    error = 1;
  }
  bail_out(error);
 
  total_length_in = (width+2*RADIUS);
  total_length_in *= (height+2*RADIUS);
  total_length_in *= sizeof(DTYPE);

  total_length_out = width;
  total_length_out *= height;
  total_length_out *= sizeof(DTYPE);
 
  in  = (DTYPE *) malloc(total_length_in);
  out = (DTYPE *) malloc(total_length_out);
  if (!in || !out) {
    printf("ERROR: rank %d could not allocate space for input/output array\n",
            my_ID);
    error = 1;
  }
  bail_out(error);
 
  /* fill the stencil weights to reflect a discrete divergence operator         */
  for (jj=-RADIUS; jj<=RADIUS; jj++) for (ii=-RADIUS; ii<=RADIUS; ii++)
    WEIGHT(ii,jj) = (DTYPE) 0.0;
  stencil_size = 4*RADIUS+1;

  for (ii=1; ii<=RADIUS; ii++) {
    WEIGHT(0, ii) = WEIGHT( ii,0) =  (DTYPE) (1.0/(2.0*ii*RADIUS));
    WEIGHT(0,-ii) = WEIGHT(-ii,0) = -(DTYPE) (1.0/(2.0*ii*RADIUS));
  }
 
  norm[0] = (DTYPE) 0.0;
  f_active_points = (DTYPE) (n-2*RADIUS)*(DTYPE) (n-2*RADIUS);

  /* intialize the input and output arrays                                     */
  for (j=jstart; j<jend; j++) for (i=istart; i<iend; i++) {
    IN(i,j)  = COEFX*i+COEFY*j;
    OUT(i,j) = (DTYPE)0.0;
  }

  /* allocate communication buffers for halo values                            */
  top_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*width);
  if (!top_buf_out) {
    printf("ERROR: Rank %d could not allocate output comm buffers for y-direction\n", my_ID);
    error = 1;
  }
  bail_out(error);
  bottom_buf_out = top_buf_out+RADIUS*width;

  top_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*width);
  if(!top_buf_in)
  {
    printf("ERROR: Rank %d could not allocate input comm buffers for y-direction\n", my_ID);
    error=1;
  }
  bail_out(error);
  top_buf_in[1]    = top_buf_in[0]    + RADIUS*width;
  bottom_buf_in[0] = top_buf_in[1]    + RADIUS*width;
  bottom_buf_in[1] = bottom_buf_in[0] + RADIUS*width;
 
  right_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*height);
  if (!right_buf_out) {
    printf("ERROR: Rank %d could not allocate output comm buffers for x-direction\n", my_ID);
    error = 1;
  }
  bail_out(error);
  left_buf_out=right_buf_out+RADIUS*height;

  right_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*height);
  if(!right_buf_in)
  {
    printf("ERROR: Rank %d could not allocate input comm buffers for x-dimension\n", my_ID);
    error=1;
  }
  bail_out(error);
  right_buf_in[1] = right_buf_in[0] + RADIUS*height;
  left_buf_in[0]  = right_buf_in[1] + RADIUS*height;
  left_buf_in[1]  = left_buf_in[0]  + RADIUS*height;

  /* make sure all symmetric heaps are allocated before being used  */
  shmem_barrier_all();

  for (iter = 0; iter<=iterations; iter++){

    /* start timer after a warmup iteration */
    if (iter == 1) { 
      shmem_barrier_all();
      local_stencil_time[0] = wtime();
    }
    /* sw determines which incoming buffer to select */
    sw = iter%2;

    /* need to fetch ghost point data from neighbors */

    if (my_IDy < Num_procsy-1) {
      for (kk=0,j=jend-RADIUS; j<=jend-1; j++) for (i=istart; i<=iend; i++) {
          top_buf_out[kk++]= IN(i,j);
      }
      shmem_putmem(bottom_buf_in[sw], top_buf_out, RADIUS*width*sizeof(DTYPE), top_nbr);
#if SPLITFENCE
      shmem_fence();
      shmem_int_inc(&iterflag[sw], top_nbr);
#endif
    }
    if (my_IDy > 0) {
      for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) {
          bottom_buf_out[kk++]= IN(i,j);
      }
      shmem_putmem(top_buf_in[sw], bottom_buf_out, RADIUS*width*sizeof(DTYPE), bottom_nbr);
#if SPLITFENCE
      shmem_fence();
      shmem_int_inc(&iterflag[sw], bottom_nbr);
#endif
    }

    if(my_IDx < Num_procsx-1) {
      for(kk=0,j=jstart;j<=jend;j++) for(i=iend-RADIUS;i<=iend-1;i++) {
	right_buf_out[kk++]=IN(i,j);
      }
      shmem_putmem(left_buf_in[sw], right_buf_out, RADIUS*height*sizeof(DTYPE), right_nbr);
#if SPLITFENCE
      shmem_fence();
      shmem_int_inc(&iterflag[sw], right_nbr);
#endif
    }

    if(my_IDx>0) {
      for(kk=0,j=jstart;j<=jend;j++) for(i=istart;i<=istart+RADIUS-1;i++) {
	left_buf_out[kk++]=IN(i,j);
      }
      shmem_putmem(right_buf_in[sw], left_buf_out, RADIUS*height*sizeof(DTYPE), left_nbr);
#if SPLITFENCE
      shmem_fence();
      shmem_int_inc(&iterflag[sw], left_nbr);
#endif
    }

#if SPLITFENCE == 0
    shmem_fence();
    if(my_IDy<Num_procsy-1) shmem_int_inc(&iterflag[sw], top_nbr);
    if(my_IDy>0)            shmem_int_inc(&iterflag[sw], bottom_nbr);
    if(my_IDx<Num_procsx-1) shmem_int_inc(&iterflag[sw], right_nbr);
    if(my_IDx>0)            shmem_int_inc(&iterflag[sw], left_nbr);
#endif

    shmem_int_wait_until(&iterflag[sw], SHMEM_CMP_EQ, count_case*(iter/2+1));

    if (my_IDy < Num_procsy-1) {
      for (kk=0,j=jend; j<=jend+RADIUS-1; j++) for (i=istart; i<=iend; i++) {
          IN(i,j) = top_buf_in[sw][kk++];
      }      
    }
    if (my_IDy > 0) {
      for (kk=0,j=jstart-RADIUS; j<=jstart-1; j++) for (i=istart; i<=iend; i++) {
          IN(i,j) = bottom_buf_in[sw][kk++];
      }      
    }

    if (my_IDx < Num_procsx-1) {
      for (kk=0,j=jstart; j<=jend; j++) for (i=iend; i<=iend+RADIUS-1; i++) {
          IN(i,j) = right_buf_in[sw][kk++];
      }      
    }
    if (my_IDx > 0) {
      for (kk=0,j=jstart; j<=jend; j++) for (i=istart-RADIUS; i<=istart-1; i++) {
          IN(i,j) = left_buf_in[sw][kk++];
      }      
    }
 
    /* Apply the stencil operator */
    for (j=MAX(jstart,RADIUS); j<=MIN(n-RADIUS-1,jend); j++) {
      for (i=MAX(istart,RADIUS); i<=MIN(n-RADIUS-1,iend); i++) {
        #if LOOPGEN
          #include "loop_body_star.incl"
        #else
          for (jj=-RADIUS; jj<=RADIUS; jj++) OUT(i,j) += WEIGHT(0,jj)*IN(i,j+jj);
          for (ii=-RADIUS; ii<0; ii++)       OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
          for (ii=1; ii<=RADIUS; ii++)       OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
        #endif
      }
    }
 
    /* add constant to solution to force refresh of neighbor data, if any */
    for (j=jstart; j<jend; j++) for (i=istart; i<iend; i++) IN(i,j)+= 1.0;
 
  }
 
  local_stencil_time[0] = wtime() - local_stencil_time[0];

  shmem_barrier_all();

  shmem_double_max_to_all(&stencil_time[0], &local_stencil_time[0], 1, 0, 0,
                          Num_procs, pWrk_time, pSync_reduce);
  
  /* compute L1 norm in parallel                                                */
  local_norm[0] = (DTYPE) 0.0;
  for (j=MAX(jstart,RADIUS); j<MIN(n-RADIUS,jend); j++) {
    for (i=MAX(istart,RADIUS); i<MIN(n-RADIUS,iend); i++) {
      local_norm[0] += (DTYPE)ABS(OUT(i,j));
    }
  }

  shmem_barrier_all();
 
#ifdef DOUBLE
  shmem_double_sum_to_all(&norm[0], &local_norm[0], 1, 0, 0, Num_procs, pWrk_norm, pSync_reduce);
#else
  shmem_float_sum_to_all(&norm[0], &local_norm[0], 1, 0, 0, Num_procs, pWrk_norm, pSync_reduce);
#endif
 
  /*******************************************************************************
  ** Analyze and output results.
  ********************************************************************************/
 
/* verify correctness                                                            */
  if (my_ID == root) {
    norm[0] /= f_active_points;
    if (RADIUS > 0) {
      reference_norm = (DTYPE) (iterations+1) * (COEFX + COEFY);
    }
    else {
      reference_norm = (DTYPE) 0.0;
    }
    if (ABS(norm[0]-reference_norm) > EPSILON) {
      printf("ERROR: L1 norm = "FSTR", Reference L1 norm = "FSTR"\n",
             norm[0], reference_norm);
      error = 1;
    }
    else {
      printf("Solution validates\n");
#ifdef VERBOSE
      printf("Reference L1 norm = "FSTR", L1 norm = "FSTR"\n", 
             reference_norm, norm[0]);
#endif
    }
  }
  bail_out(error);
 
  if (my_ID == root) {
    /* flops/stencil: 2 flops (fma) for each point in the stencil, 
       plus one flop for the update of the input of the array        */
    flops = (DTYPE) (2*stencil_size+1) * f_active_points;
    avgtime = stencil_time[0]/iterations;
    printf("Rate (MFlops/s): "FSTR"  Avg time (s): %lf\n",
           1.0E-06 * flops/avgtime, avgtime);
  }
 

  prk_shmem_free(top_buf_in);
  prk_shmem_free(right_buf_in);
  free(top_buf_out);
  free(right_buf_out);

  prk_shmem_free(pSync_bcast);
  prk_shmem_free(pSync_reduce);
  prk_shmem_free(pWrk_time);
  prk_shmem_free(pWrk_norm);

  prk_shmem_finalize();

  exit(EXIT_SUCCESS);
}