Esempio n. 1
0
SizeT _MPIStream_Write( Stream* stream, void *data, SizeT elem_size, SizeT num_elems )
{
	MPIStream* self = (MPIStream*)stream;
	MPI_Status status;
	int   writeResult;
	
	writeResult = MPI_File_write( *(MPI_File*)(self->_file->fileHandle), data, num_elems * elem_size,
		 			MPI_BYTE, &status );

	if (writeResult != MPI_SUCCESS) {
		char         errorString[2000];
		int          errorStringLength = 0;
		Stream*      errorStream = Journal_Register( Error_Type, MPIFile_Type );
		int          myRank = 0;

		MPI_Comm_rank( MPI_COMM_WORLD, &myRank );
		MPI_Error_string( writeResult, errorString, &errorStringLength);
		Journal_Printf( errorStream, "%3d: %s\n", myRank, errorString );
		File_Close( self->_file );
		MPI_Abort(MPI_COMM_WORLD, writeResult );
	}

	return num_elems;
}
Esempio n. 2
0
/**
 * \brief Initialization function of measure function
 *        measure_MPI_IO_write_file_once().
 *
 * Only one process is active. It writes once to a file.
 *
 * Remark:<br>
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
 * is done directly to/from user space buffers. The operation system's page
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
 * required for buffer and file offset. Thus the following parameters should be
 * set in a SKaMPI input file:
 * - <tt>set_send_buffert_alignment (512)</tt>
 * - <tt>set_recv_buffert_alignment (512)</tt>
 * - <tt>switch_buffer_cycling_off ()</tt><br>
 * 
 * For more information please refer to the <tt>open ()</tt> man pages.
 * 
 * \param[in] size        size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
 * \param[in] api         POSIX-API or MPI-API for I/O accesses
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
 *                          cache effects
 *
 * \return    void
 */
void init_MPI_IO_write_file_once (int size, char *api, int create_flag, int directio_flag) {
  char *send_buffer;

  assert (size > 0);

  io_filename = get_io_filename (IO_FILENAME, 0);

  if (get_measurement_rank () == 0){

    if (create_flag == 0){
      send_buffer = mpi_malloc_chars (get_extent (size, MPI_BYTE));
    
      MPI_File_open (MPI_COMM_SELF, io_filename,
		     MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN,
		     MPI_INFO_NULL, &io_fh);
      MPI_File_set_view (io_fh, (MPI_Offset)0, 
			 MPI_BYTE, MPI_BYTE,
			 "native", MPI_INFO_NULL);
      MPI_File_write (io_fh, send_buffer, size, MPI_BYTE, MPI_STATUS_IGNORE);
      MPI_File_close (&io_fh);
      mpi_free (send_buffer);
    }

    set_send_buffer_usage (size);
    set_reported_message_size (size);
  }

  MPI_Barrier (get_measurement_comm ());

  /* set synchronization type:
   SYNC_BARRIER if all SKaMPI processes run on one physical processor 
   SYNC_REAL if every SKaMPI process runs on its own physical processor */
  set_synchronization (SYNC_REAL);

  init_synchronization ();
}
Esempio n. 3
0
int main(int argc, char  **argv)
{
	int *buf, i, rank, nints, len;
	char *filename, *tmp;
	MPI_File fh;
	MPI_Status status;

	FILE * fp;
	char * line = NULL;
	size_t slen = 0;
	ssize_t sread;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	if(rank == 0){
		i = 1;
		while((i < argc) && strcmp("-fname", *argv)){
			i++;
			argv++;
		}
		if(i >= argc){
			fprintf(stderr, "sss\n");
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
		argv++;
		len = strlen(*argv);
		filename = (char *)malloc(len+10);
		strcpy(filename, *argv);
		MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
	}
	else{
		MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
		filename = (char *) malloc(len+10);
		MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
	}
	buf = (int *)malloc(SIZE);
//	nints = SIZE/sizeof(int);
	fp = fopen("/Users/jinyangzhou/Documents/Github/MPI-IO", "r");

	while((sread = getline(&line, &slen, fp)) != -1){
//		printf("%d\n", atoi(line));
		for(i = rank*3; i<rank+3; i++){
			buf[i] = atoi(line);
		}
	}
/*	for(i = 0; i<nints, i++){
		buf[i] = rank;
	}
*/
	tmp = (char *) malloc(len+10);
	strcpy(tmp, filename);
	sprintf(filename, "%s.%d", tmp, rank);

	MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
	MPI_File_write(fh, buf, 3, MPI_INT, &status);
	MPI_File_close(&fh);

	for(i = rank*3; i<rank+3; i++)
		buf[i] = buf[i] + 10;
	MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE + MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
	MPI_File_write(fh, buf, 3, MPI_INT, &status);
	MPI_File_close(&fh);

	for(i = rank*3; i<rank+3; i++)
		printf("%d\n", buf[i]);

	free(buf);
	free(filename);
	free(tmp);

	MPI_Finalize();
	return 0;
}
Esempio n. 4
0
int
main(int argc, char* argv[]) 
{
   int n, my_rank;
   int array_of_subsizes[NDIMS], array_of_starts[NDIMS], array_of_sizes[NDIMS];
   int size = 4;
   int sqrtn;
   int ln;
   MPI_Datatype filetype, memtype;
   MPI_File fh;
   char hdr[128];
   int header_bytes;
   unsigned char *cur;
   char name[128];
   int resultlen;
   int ret;
   int i, j;

   /* Initialize MPI. */
   MPI_Init(&argc, &argv);
   MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

   /* Learn my rank and the total number of processors. */
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &n);

   /* Speak! */
   MPI_Get_processor_name(name, &resultlen);
   printf("process %d running on %s\n", my_rank, name);

   /* Set up our values. */
   sqrtn = (int)sqrt(n);
   ln = size/sqrtn;
   printf("n = %d, sqrtn = %d, ln = %d storage = %d\n", n, sqrtn, ln, (ln + 2) * (ln + 2));

   /* Allocation storage. */
   if (!(cur = calloc((ln + 2) * (ln + 2), 1)))
      return ERR;

   /* Initialize data. */
   for (i = 1; i < ln + 1; i++)
      for (j = 1; j < ln + 1; j++)
	 cur[i * (ln + 2) + j] = my_rank;

   /* Create a subarray type for the file. */
   array_of_sizes[0] = array_of_sizes[1] = size;
   array_of_subsizes[0] = array_of_subsizes[1] = ln;
   array_of_starts[0] = my_rank/sqrtn * ln;
   array_of_starts[1] = (my_rank % sqrtn) * ln;
   if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &filetype)))
      MPIERR(ret);
   if ((ret = MPI_Type_commit(&filetype)))
      MPIERR(ret);

   /* Create a subarray type for memory. */
   array_of_sizes[0] = array_of_sizes[1] = ln + 2;
   array_of_subsizes[0] = array_of_subsizes[1] = ln;
   array_of_starts[0] = array_of_starts[1] = 1;
   if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &memtype)))
      MPIERR(ret);
   if ((ret = MPI_Type_commit(&memtype)))
      MPIERR(ret);

   MPI_File_delete(FILE_NAME, MPI_INFO_NULL);
   if ((ret = MPI_File_open(MPI_COMM_WORLD, FILE_NAME, MPI_MODE_CREATE|MPI_MODE_RDWR, 
        MPI_INFO_NULL, &fh)))
      MPIERR(ret);

   /* Create header info, and have process 0 write it to the file. */
   sprintf(hdr, "P5\n%d %d\n255\n", size, size);
   header_bytes = strlen(hdr);
   if ((ret = MPI_File_write_all(fh, hdr, header_bytes, MPI_BYTE, MPI_STATUS_IGNORE)))
      MPIERR(ret);
	 
   /* Set the file view to translate our memory data into the file's data layout. */
   MPI_File_set_view(fh, header_bytes, MPI_BYTE, filetype, "native", MPI_INFO_NULL);

   /* Write the output. */
   MPI_File_write(fh, cur, 1, memtype, MPI_STATUS_IGNORE);

   if ((ret = MPI_File_close(&fh)))
      MPIERR(ret);

   MPI_Finalize();
   return 0;
}
Esempio n. 5
0
int main(int argc, char **argv)
{
    int *buf, i, rank, nints, flag;
	size_t len;
    char *filename, *tmp;
    MPI_File fh;
    MPI_Status status;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!rank) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    printf("\n*#  Usage: %s -fname filename\n\n", argv[0]);
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+10);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, (int)len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+10);
	MPI_Bcast(filename, (int)len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    

    buf = (int *) malloc(SIZE);
    nints = SIZE/sizeof(int);
    for (i=0; i<nints; i++) buf[i] = rank*100000 + i;

    /* each process opens a separate file called filename.'myrank' */
    tmp = (char *) malloc(len+10);
    strcpy(tmp, filename);
    sprintf(filename, "%s.%d", tmp, rank);

    MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
		   MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    MPI_File_write(fh, buf, nints, MPI_INT, &status);
    MPI_File_close(&fh);

    /* reopen the file and read the data back */

    for (i=0; i<nints; i++) buf[i] = 0;
    MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    MPI_File_read(fh, buf, nints, MPI_INT, &status);
    MPI_File_close(&fh);

    /* check if the data read is correct */
    flag = 0;
    for (i=0; i<nints; i++) 
	if (buf[i] != (rank*100000 + i)) {
	    printf("Process %d: error, read %d, should be %d\n", rank, buf[i], rank*100000+i);
	    flag = 1;
	}

    if (!flag) printf("Process %d: data read back is correct\n", rank);

    free(buf);
    free(filename);
    free(tmp);

    MPI_Finalize(); 
    return 0;
}
Esempio n. 6
0
int main(int argc, char *argv[])
{
    int rank, size;
    const int N = atoi(argv[1]);

//    printf("Number of testcase = %d\n", N);

    MPI_Init (&argc, &argv);

    double start_time, end_time;
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
    MPI_Comm_size (MPI_COMM_WORLD, &size);
 //   printf("My rank is %d \n", rank); 
    
    //start_time = MPI_Wtime();

    MPI_File fin, fout;
    MPI_Status status;
    int *root_arr;
    int max_arr_size = size > N ? size : N;
    int ret = MPI_File_open(MPI_COMM_WORLD, argv[2], 
                MPI_MODE_RDONLY, MPI_INFO_NULL, &fin);
    
    if (rank == ROOT) {
        root_arr = new int[max_arr_size+3];
//        printf("Enter rank 0 statement ... \n");
        MPI_File_read(fin, root_arr, N, MPI_INT, &status);
        
/*        for (int i = 0; i < N; ++i)
             printf("[START] [Rank %d] root_arr[%d] = %d\n", rank, i, root_arr[i]); 
        printf("Out Rank 0 statement ... \n");
*/    } 
    MPI_File_close(&fin);
    
    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 
    
    int rank_num = size > N ? N : size;
    const int LAST = rank_num - 1;
    int num_per_node = N / rank_num;
    int *local_arr;
    int num_per_node_diff = N - num_per_node * rank_num;
    int diff = num_per_node_diff;
    bool has_remain = false;
    bool has_remain_rank = rank_num % 2 ? true : false;
    
    if (num_per_node_diff > 0) {
        // Send remaining elements to size - 1
        has_remain = true;
        if (rank == ROOT) {
            MPI_Send(root_arr + N - diff, diff, MPI_INT, LAST, 0, MPI_COMM_WORLD); 
        } else if (rank == LAST) {
            // Handle special case
            num_per_node += num_per_node_diff;
            local_arr = new int[num_per_node+1];
            MPI_Recv(local_arr + num_per_node - diff, diff, 
                    MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        }
    } else if(rank == rank_num - 1) {
        local_arr = new int[num_per_node+1];
    }

    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 
    if (rank != rank_num - 1)
        local_arr = new int[num_per_node+1];
	

    // MPI_Scatter (send_buf, send_count, send_type, recv_buf, recv_count, recv_type, root, comm)
	if (rank < LAST)
        MPI_Scatter(root_arr, num_per_node, MPI_INT, local_arr, 
                    num_per_node, MPI_INT, ROOT, MPI_COMM_WORLD);
    else
        MPI_Scatter(root_arr, num_per_node-diff, MPI_INT, local_arr, 
                    num_per_node-diff, MPI_INT, ROOT, MPI_COMM_WORLD);
    
   // printf("[Rank %d] num_per_node_size = %d\n" ,rank, num_per_node); 
    MPI_Barrier(MPI_COMM_WORLD);
/*
    for (int i = 0; i < num_per_node; ++i)
        printf("[BEFORE] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); 
*/
if (rank < rank_num) {
    int round = N % 2 ? N+1 : N;
    for (int i = 0; i < round; ++i) {
        // bool need_send = (i & 1)^(num_per_node & 1);
         bool need_send = true;
        for (int j = i & 1; j < num_per_node; j+=2) {
            if (j+1 < num_per_node) {
                if (local_arr[j] > local_arr[j+1]) 
                    swap(local_arr[j], local_arr[j+1]);        
            } else if (j-1 >= 0) {
                if (local_arr[j-1] > local_arr[j]) 
                    swap(local_arr[j-1], local_arr[j]);
            }            
        }
        
        int element;
        bool recv_side;
        if (i & 1) {
            if (rank & 1)   recv_side = true;
            else    recv_side = false;
        } else {
            if (rank & 1)   recv_side = false;
            else    recv_side = true;
        }
        // if (recv_side) printf("i = %d, rank = %d, recv\n", i, rank);
        // if (!recv_side) printf("i = %d, rank = %d, send\n", i, rank);

        if (recv_side) {
            if (rank != ROOT) {
                /* Receive element */
                MPI_Recv(&element, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD, &status);  
                MPI_Send(local_arr, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD); 
                if (element > local_arr[0])
                    swap(element, local_arr[0]);
            }
        } else {
            /* Send element */
            if (rank != LAST) {
                element = local_arr[num_per_node-1];
                MPI_Send(&element, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD); 
                MPI_Recv(&element, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD, &status);  
                if (element < local_arr[num_per_node-1])
                    swap(element, local_arr[num_per_node-1]);
            }
        }
    }
}
/* 
    MPI_Barrier(MPI_COMM_WORLD);
    for (int i = 0; i < num_per_node; ++i)
        printf("[AFTER] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); 
    
    printf("rank %d is arrived\n", rank);
*/    
    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 

    int *ans;
    if (rank == ROOT) { 
        ans = new int[max_arr_size+3];
    }

    if (has_remain && rank == rank_num - 1) {
        MPI_Gather(local_arr, num_per_node - diff, MPI_INT, 
            ans, num_per_node - diff, MPI_INT, ROOT, MPI_COMM_WORLD);
        
        MPI_Send(local_arr + num_per_node - diff, diff,
                         MPI_INT, ROOT, 0, MPI_COMM_WORLD); 
    }
    else {
        MPI_Gather(local_arr, num_per_node, MPI_INT, ans, num_per_node, 
                                        MPI_INT, ROOT, MPI_COMM_WORLD);
        if (has_remain && rank == ROOT)
            MPI_Recv(ans + N - diff, diff, MPI_INT, LAST, 
                        MPI_ANY_TAG, MPI_COMM_WORLD, &status);  
    }


    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_open(MPI_COMM_WORLD, argv[3], 
        MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fout);
    
    if (rank == ROOT) {
        MPI_File_write(fout, ans, N, MPI_INT, &status);
        for (int i = 0; i < N; ++i) {
         //   printf("[FINAL] [Rank %d] ans[%d] = %d\n", rank, i, ans[i]);
        }
    }
    MPI_File_close(&fout);
    
   // printf("rank %d is arrived\n", rank);
    MPI_Barrier(MPI_COMM_WORLD);
    
    if (rank != 0) {
        delete []  local_arr;
     //   printf("[FREE] [RANK %d] SUCCESS FREE\n", rank);
    } else {
        delete [] ans;
        delete [] root_arr;
        delete [] local_arr;;
    }
    MPI_Finalize();
     
    return 0;
}
Esempio n. 7
0
int main(int argc, char *argv[]) 
{ 
  int i, j, nerrors=0, total_errors=0; 

  int rank, size;
  int bpos;

  MPI_Datatype darray;
  MPI_Status status;
  MPI_File mpi_fh;

  /* Define array distribution
      A 2x2 block size works with ROMIO, a 3x3 block size breaks it. */
  int distrib[2] = { MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC };
  int bsize[2] = { NBLOCK, NBLOCK };
  int gsize[2] = { NSIDE, NSIDE };
  int psize[2] = { NPROC, NPROC };

  double data[NSIDE*NSIDE];
  double *ldata, *pdata;

  int tsize, nelem;

  MPI_File dfile;
 
  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  /* Set up type */
  CHECK(MPI_Type_create_darray(size, rank, 2, gsize, distrib,
			 bsize, psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &darray));
  CHECK(MPI_Type_commit(&darray));
  CHECK(MPI_Type_size(darray, &tsize));
  nelem = tsize / sizeof(double);

  for(i = 0; i < (NSIDE*NSIDE); i++) data[i] = i;

  if (rank == 0) {
    CHECK(MPI_File_open(MPI_COMM_SELF, argv[1],
		MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &dfile));
    CHECK(MPI_File_write(dfile, data, NSIDE*NSIDE, MPI_DOUBLE, &status));
    CHECK(MPI_File_close(&dfile));
  }
  MPI_Barrier(MPI_COMM_WORLD);

  /* Allocate buffer */
  ldata = (double *)malloc(tsize);
  pdata = (double *)malloc(tsize);

  /* Use Pack to pull out array */
  bpos = 0;
  CHECK(MPI_Pack(data, 1, darray, pdata, tsize, &bpos, MPI_COMM_WORLD));

  MPI_Barrier(MPI_COMM_WORLD);

  /* Read in array from file.  */
  CHECK(MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_fh));
  CHECK(MPI_File_set_view(mpi_fh, 0, MPI_DOUBLE, darray, "native", MPI_INFO_NULL));
  CHECK(MPI_File_read_all(mpi_fh, ldata, nelem, MPI_DOUBLE, &status));
  CHECK(MPI_File_close(&mpi_fh));

  for(i = 0; i < size; i++) {
#ifdef VERBOSE
    MPI_Barrier(MPI_COMM_WORLD);
    if(rank == i) {
      printf("=== Rank %i === (%i elements) \nPacked: ", rank, nelem);
      for(j = 0; j < nelem; j++) {
        printf("%4.1f ", pdata[j]);
        fflush(stdout);
      }
      printf("\nRead:   ");
      for(j = 0; j < nelem; j++) {
        printf("%4.1f ", ldata[j]);
        fflush(stdout);
      }
      printf("\n\n");
      fflush(stdout);
    }
#endif
    if(rank == i) {
	for (j=0; j< nelem; j++) {
	    if (pdata[j] != ldata[j]) {
		fprintf(stderr, "rank %d at index %d: packbuf %4.1f filebuf %4.1f\n",
			rank, j, pdata[j], ldata[j]);
		nerrors++;
	    }
	}
    }
  }
  MPI_Allreduce(&nerrors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
  if (rank == 0 && total_errors == 0)
      printf(" No Errors\n");

  free(ldata);
  free(pdata);
  MPI_Type_free(&darray);
  MPI_Finalize();

  exit(total_errors);

} 
Esempio n. 8
0
//***************************************************************************************************************
void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
	try{
		
		string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
		
		MPI_File outSVG;
		int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;

		//char* FileName = new char[file.length()];
		//memcpy(FileName, file.c_str(), file.length());
		
		char FileName[1024];
		strcpy(FileName, file.c_str());

		MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG);  //comm, filename, mode, info, filepointer
		
		//delete FileName;

		int width = (info.size()*5) + 150;
		
		string outString = "";
		
		outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
		outString += "<g>\n";
		outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
		
		outString +=  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
		outString +=  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
		
		outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
		outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
		outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
		
		outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
		
		outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
		
		
		//find max is score
		float biggest = 0.0;
		for (int i = 0; i < info.size(); i++) {
			if (info[i].score > biggest)  {
				biggest = info[i].score;
			}
		}
		
		outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
		
		int scaler2 = 500 / biggest;
		
		
		outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
		//160,200 180,230 200,210 234,220\"/> "; 
		for (int i = 0; i < info.size(); i++) {
			if(info[i].score < 0) { info[i].score = 0; }
			outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
		}
		
		outString += "\"/> ";
		outString += "</g>\n</svg>\n";
		
		MPI_Status status;
		int length = outString.length();
		char* buf2 = new char[length];
		memcpy(buf2, outString.c_str(), length);
				
		MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
		delete buf2;
		
		MPI_File_close(&outSVG);

	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
		exit(1);
	}
}
Esempio n. 9
0
int main(int argc, char **argv)
{
    int *buf, i, rank, nints, len;
    char *filename, *tmp;
    int  errs = 0, toterrs, errcode;
    MPI_File fh;
    MPI_Status status;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!rank) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    fprintf(stderr, "\n*#  Usage: simple -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+10);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+10);
	MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    

    buf = (int *) malloc(SIZE);
    nints = SIZE/sizeof(int);
    for (i=0; i<nints; i++) buf[i] = rank*100000 + i;

    /* each process opens a separate file called filename.'myrank' */
    tmp = (char *) malloc(len+10);
    strcpy(tmp, filename);
    sprintf(filename, "%s.%d", tmp, rank);

    errcode = MPI_File_open(MPI_COMM_SELF, filename, 
		    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open(1)");

    errcode = MPI_File_write(fh, buf, nints, MPI_INT, &status);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_write");

    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_clode (1)");

    /* reopen the file and read the data back */

    for (i=0; i<nints; i++) buf[i] = 0;
    errcode = MPI_File_open(MPI_COMM_SELF, filename, 
		    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open(2)");

    errcode = MPI_File_read(fh, buf, nints, MPI_INT, &status);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read");

    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close(2)");

    /* check if the data read is correct */
    for (i=0; i<nints; i++) {
	if (buf[i] != (rank*100000 + i)) {
	    errs++;
	    fprintf(stderr, "Process %d: error, read %d, should be %d\n", 
		    rank, buf[i], rank*100000+i);
	}
    }

    MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (rank == 0) {
	if( toterrs > 0) {
	    fprintf( stderr, "Found %d errors\n", toterrs );
	}
	else {
	    fprintf( stdout, " No Errors\n" );
	}
    }

    free(buf);
    free(filename);
    free(tmp);

    MPI_Finalize();
    return 0; 
}
Esempio n. 10
0
/* 
 * Save rank_values_table[] to a file
 * Note
 * -. Only root (rank 0) will save rank values to a file
 * -. Assumed rank_values_table[] are the same across all proc 
 */
int mpi_write(char *filename,   /* (IN) file name */
              int totalNumUrls, /* (IN) number of total urls */
              double *rank_values_table,        /* (IN) array of rank values. double[total_num_urls] */
              MPI_Comm comm)    /* (IN) MPI communicator */
{

    int divide, rem, len, err;
    int i, j, k, rank, nproc;
    char outFileName[1024], fs_type[32], str[32];
    MPI_File fh,fhall;
    MPI_Status status;
	int *index;
	void quickSort(double arr[], int index[],int left, int right);

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &nproc);

    /* only proc 0 do this, because rank_values_table[] are the same across all proc */
    if (rank == 0)
    {
		index = (int *)malloc(totalNumUrls * sizeof(int));
        
		sprintf(outFileName, "%s.all",filename);
		printf("\nProc:%d is writing rank values of %d urls to file %s\n",
             rank, totalNumUrls,outFileName);
		printf("Proc:%d is writing top 10 page rank values to file %s\n",
             rank,filename);
        err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
        if (err != MPI_SUCCESS)
        {
            char errstr[MPI_MAX_ERROR_STRING];
            int errlen;
            MPI_Error_string(err, errstr, &errlen);
            printf("Error at opening file %s (%s)\n", filename, errstr);
            MPI_Finalize();
            exit(1);
        }
        err = MPI_File_open(MPI_COMM_SELF, outFileName, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fhall);
        if (err != MPI_SUCCESS)
        {
            char errstr[MPI_MAX_ERROR_STRING];
            int errlen;
            MPI_Error_string(err, errstr, &errlen);
            printf("Error at opening file %s (%s)\n", outFileName, errstr);
            MPI_Finalize();
            exit(1);
        }

		for (i = 0; i < totalNumUrls; i++)
        {
            index[i] = i;
			memset(str,'\0',32);
			sprintf(str, "%d ", i);
            MPI_File_write(fhall, str, strlen(str), MPI_CHAR, &status);
            sprintf(str, "%f\n", rank_values_table[i]);
            MPI_File_write(fhall, str, strlen(str), MPI_CHAR, &status);
        }
        MPI_File_close(&fhall);
		// Sort the urls
		printf("Using quicksort to sort the top 10 Urls\n");
		quickSort(rank_values_table,index,0,totalNumUrls-1);
        for (i = totalNumUrls-1; i >= totalNumUrls-10; i--)
        {
			memset(str,'\0',32);
			sprintf(str, "%d\t", index[i]);
            MPI_File_write(fh, str, strlen(str), MPI_CHAR, &status);
            sprintf(str, "%f\n", rank_values_table[i]);
            MPI_File_write(fh, str, strlen(str), MPI_CHAR, &status);
        }
		MPI_File_close(&fh);
		free(index);
    }
    return 1;
}
Esempio n. 11
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int size, rank, i, *buf, count, rc;
    MPI_File fh;
    MPI_Comm comm;
    MPI_Status status;

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;
    rc = MPI_File_open( comm, (char*)"test.ord", 
			MPI_MODE_RDWR | MPI_MODE_CREATE |
			MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh );
    if (rc) {
	MTestPrintErrorMsg( "File_open", rc );
	errs++;
	/* If the open fails, there isn't anything else that we can do */
	goto fn_fail;
    }


    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    buf = (int *)malloc( size * sizeof(int) );
    buf[0] = rank;
    /* Write to file */
    rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status );
    if (rc) {
	MTestPrintErrorMsg( "File_write_ordered", rc );
	errs++;
    }
    else {
	MPI_Get_count( &status, MPI_INT, &count );
	if (count != 1) {
	    errs++;
	    fprintf( stderr, "Wrong count (%d) on write-ordered\n", count );
	}
    }

    /* Set the individual pointer to 0, since we want to use a read_all */
    MPI_File_seek( fh, 0, MPI_SEEK_SET ); 

    /* Read nothing (check status) */
    memset( &status, 0xff, sizeof(MPI_Status) );
    MPI_File_read( fh, buf, 0, MPI_INT, &status );
    MPI_Get_count( &status, MPI_INT, &count );
    if (count != 0) {
	errs++;
	fprintf( stderr, "Count not zero (%d) on read\n", count );
    }

    /* Write nothing (check status) */
    memset( &status, 0xff, sizeof(MPI_Status) );
    MPI_File_write( fh, buf, 0, MPI_INT, &status );
    if (count != 0) {
	errs++;
	fprintf( stderr, "Count not zero (%d) on write\n", count );
    }

    /* Read shared nothing (check status) */
    MPI_File_seek_shared( fh, 0, MPI_SEEK_SET );
    /* Read nothing (check status) */
    memset( &status, 0xff, sizeof(MPI_Status) );
    MPI_File_read_shared( fh, buf, 0, MPI_INT, &status );
    MPI_Get_count( &status, MPI_INT, &count );
    if (count != 0) {
	errs++;
	fprintf( stderr, "Count not zero (%d) on read shared\n", count );
    }
    
    /* Write nothing (check status) */
    memset( &status, 0xff, sizeof(MPI_Status) );
    MPI_File_write_shared( fh, buf, 0, MPI_INT, &status );
    if (count != 0) {
	errs++;
	fprintf( stderr, "Count not zero (%d) on write\n", count );
    }

    MPI_Barrier( comm );

    MPI_File_seek_shared( fh, 0, MPI_SEEK_SET );
    for (i=0; i<size; i++) buf[i] = -1;
    MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status );
    if (buf[0] != rank) {
	errs++;
	fprintf( stderr, "%d: buf = %d\n", rank, buf[0] );
    }

    free( buf );

    MPI_File_close( &fh );

 fn_fail:
    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Esempio n. 12
0
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){
	try {
		ValidCalculators validCalculator;
		Dist* distCalculator;
		if (m->isTrue(countends) == true) {
			for (int i=0; i<Estimators.size(); i++) {
				if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
					if (Estimators[i] == "nogaps")			{	distCalculator = new ignoreGaps();	}
					else if (Estimators[i] == "eachgap")	{	distCalculator = new eachGapDist();	}
					else if (Estimators[i] == "onegap")		{	distCalculator = new oneGapDist();	}
				}
			}
		}else {
			for (int i=0; i<Estimators.size(); i++) {
				if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
					if (Estimators[i] == "nogaps")		{	distCalculator = new ignoreGaps();					}
					else if (Estimators[i] == "eachgap"){	distCalculator = new eachGapIgnoreTermGapDist();	}
					else if (Estimators[i] == "onegap")	{	distCalculator = new oneGapIgnoreTermGapDist();		}
				}
			}
		}
		
		MPI_Status status;
		
		MPI_File outMPI;
		int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 

		//char* filename = new char[file.length()];
		//memcpy(filename, file.c_str(), file.length());
		
		char filename[1024];
		strcpy(filename, file.c_str());

		MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
		//delete filename;

		int startTime = time(NULL);
		
		string outputString = "";
		size = 0;
		
		if(startLine == 0){	outputString += toString(alignDB.getNumSeqs()) + "\n";	}
		
		for(int i=startLine;i<endLine;i++){
				
			string name = alignDB.get(i).getName();
			if (name.length() < 10) { //pad with spaces to make compatible
				while (name.length() < 10) {  name += " ";  }
			}
			outputString += name;
			
			for(int j=0;j<alignDB.getNumSeqs();j++){
				
				if (m->control_pressed) { delete distCalculator; return 0;  }
				
				distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
				double dist = distCalculator->getDist();
				
				outputString += "\t" + toString(dist);
			}
			
			outputString += "\n"; 

		
			if(i % 100 == 0){
				m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
			}
			
			
			//send results to parent
			int length = outputString.length();
			char* buf = new char[length];
			memcpy(buf, outputString.c_str(), length);
			
			MPI_File_write(outMPI, buf, length, MPI_CHAR, &status);
			size += outputString.length();
			outputString = "";
			delete buf;
		}
		
		m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n");
		
		MPI_File_close(&outMPI);
		delete distCalculator;
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "DistanceCommand", "driverMPI");
		exit(1);
	}
}
Esempio n. 13
0
int DistanceCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int startTime = time(NULL);
		
		//save number of new sequence
		numNewFasta = alignDB.getNumSeqs();
		
		//sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
		if ((oldfastafile != "") && (column != ""))  {	if (!(sanityCheck())) { return 0; }  }
		
		if (m->control_pressed) { return 0; }
		
		int numSeqs = alignDB.getNumSeqs();
		cutoff += 0.005;
		
		if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
		
		string outputFile;
        
        map<string, string> variables; 
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		if (output == "lt") { //does the user want lower triangle phylip formatted file 
            variables["[outputtag]"] = "phylip";
			outputFile = getOutputFileName("phylip", variables);
			m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
			
			//output numSeqs to phylip formatted dist file
		}else if (output == "column") { //user wants column format
			outputFile = getOutputFileName("column", variables);
			outputTypes["column"].push_back(outputFile);
			
			//so we don't accidentally overwrite
			if (outputFile == column) { 
				string tempcolumn = column + ".old"; 
				rename(column.c_str(), tempcolumn.c_str());
			}
			
			m->mothurRemove(outputFile);
		}else { //assume square
			variables["[outputtag]"] = "square";
			outputFile = getOutputFileName("phylip", variables);
			m->mothurRemove(outputFile);
			outputTypes["phylip"].push_back(outputFile);
		}
		

#ifdef USE_MPI
		
		int pid, start, end; 
		int tag = 2001;
				
		MPI_Status status; 
		MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
		
		//each process gets where it should start and stop in the file
		if (output != "square") {
			start = int (sqrt(float(pid)/float(processors)) * numSeqs);
			end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
		}else{
			start = int ((float(pid)/float(processors)) * numSeqs);
			end = int ((float(pid+1)/float(processors)) * numSeqs);
		}
		
		if (output == "column") {
			MPI_File outMPI;
			int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 

			//char* filename = new char[outputFile.length()];
			//memcpy(filename, outputFile.c_str(), outputFile.length());
			
			char filename[1024];
			strcpy(filename, outputFile.c_str());
			
			MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
			//delete filename;

			if (pid == 0) { //you are the root process 
				
				//do your part
				string outputMyPart;
				
				driverMPI(start, end, outMPI, cutoff); 
				
				if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
			
				//wait on chidren
				for(int i = 1; i < processors; i++) { 
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
					
					char buf[5];
					MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
				}
			}else { //you are a child process
				//do your part
				driverMPI(start, end, outMPI, cutoff); 
				
				if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
			
				char buf[5];
				strcpy(buf, "done"); 
				//tell parent you are done.
				MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
			}
			
			MPI_File_close(&outMPI);
			
		}else { //lower triangle format
			if (pid == 0) { //you are the root process 
			
				//do your part
				string outputMyPart;
				unsigned long long mySize;
				
				if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
				else { driverMPI(start, end, outputFile, mySize, output); }
	
				if (m->control_pressed) {  outputTypes.clear();   return 0; }
				
				int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
				MPI_File outMPI;
				MPI_File inMPI;

				//char* filename = new char[outputFile.length()];
				//memcpy(filename, outputFile.c_str(), outputFile.length());
				
				char filename[1024];
				strcpy(filename, outputFile.c_str());

				MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
				//delete filename;

				//wait on chidren
				for(int b = 1; b < processors; b++) { 
					unsigned long long fileSize;
					
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
					
					MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
					
					string outTemp = outputFile + toString(b) + ".temp";

					char* buf = new char[outTemp.length()];
					memcpy(buf, outTemp.c_str(), outTemp.length());
					
					MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
					delete buf;

					int count = 0;
					while (count < fileSize) { 
						char buf2[1];
						MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
						MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
						count += 1;
					}
					
					MPI_File_close(&inMPI); //deleted on close
				}
				
				MPI_File_close(&outMPI);
			}else { //you are a child process
				//do your part
				unsigned long long size;
				if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
				else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
				
				if (m->control_pressed) {  return 0; }
			
				//tell parent you are done.
				MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
			}
		}
        
        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        
#else
				
	//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		//if you don't need to fork anything
		if(processors == 1){
			if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
			else { driver(0, numSeqs, outputFile, "square");  }
		}else{ //you have multiple processors
			createProcesses(outputFile, numSeqs);
		}
	//#else
		//ifstream inFASTA;
		//if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
		//else { driver(0, numSeqs, outputFile, "square");  }
	//#endif
	
#endif
		if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
		
		#ifdef USE_MPI
			MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
					
			if (pid == 0) { //only one process should output to screen
		#endif
		
		//if (output == "square") {  convertMatrix(outputFile); }
		
		ifstream fileHandle;
		fileHandle.open(outputFile.c_str());
		if(fileHandle) {
			m->gobble(fileHandle);
			if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
		}
		
		//append the old column file to the new one
		if ((oldfastafile != "") && (column != ""))  {
			//we had to rename the column file so we didnt overwrite above, but we want to keep old name
			if (outputFile == column) { 
				string tempcolumn = column + ".old";
				m->appendFiles(tempcolumn, outputFile);
				m->mothurRemove(tempcolumn);
			}else{
				m->appendFiles(outputFile, column);
				m->mothurRemove(outputFile);
				outputFile = column;
			}
			
			if (outputDir != "") { 
				string newOutputName = outputDir + m->getSimpleName(outputFile);
				rename(outputFile.c_str(), newOutputName.c_str());
				m->mothurRemove(outputFile);
				outputFile = newOutputName;
			}
		}

		
		#ifdef USE_MPI
			}
		#endif
		
		if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
		
		//set phylip file as new current phylipfile
		string current = "";
		itTypes = outputTypes.find("phylip");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
		}
		
		//set column file as new current columnfile
		itTypes = outputTypes.find("column");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(outputFile); m->mothurOutEndLine();
		m->mothurOutEndLine();
		m->mothurOut("It took " + toString(time(NULL) - startTime) + " seconds to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();


		if (m->isTrue(compress)) {
			m->mothurOut("Compressing..."); m->mothurOutEndLine();
			m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
			system(("gzip -v " + outputFile).c_str());
			outputNames.push_back(outputFile + ".gz");
		}else { outputNames.push_back(outputFile); }

		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "DistanceCommand", "execute");
		exit(1);
	}
}
Esempio n. 14
0
int main(int argc, char **argv)
{
    int buf[1024], amode, flag, mynod, len, i;
    MPI_File fh;
    MPI_Status status;
    MPI_Datatype newtype;
    MPI_Offset disp, offset;
    MPI_Group group;
    MPI_Datatype etype, filetype;
    char datarep[25], *filename;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!mynod) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    printf("\n*#  Usage: misc  <mpiparameter> -- -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+1);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+1);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }


    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  MPI_INFO_NULL, &fh);

    MPI_File_write(fh, buf, 1024, MPI_INT, &status);

    MPI_File_sync(fh);

    MPI_File_get_amode(fh, &amode);
    if (!mynod) printf("testing MPI_File_get_amode\n");
    if (amode != (MPI_MODE_CREATE | MPI_MODE_RDWR))
	printf("amode is %d, should be %d\n\n", amode, MPI_MODE_CREATE |
                      MPI_MODE_RDWR);

    MPI_File_get_atomicity(fh, &flag);
    if (flag) printf("atomicity is %d, should be 0\n", flag);
    if (!mynod) printf("setting atomic mode\n");
    MPI_File_set_atomicity(fh, 1);
    MPI_File_get_atomicity(fh, &flag);
    if (!flag) printf("atomicity is %d, should be 1\n", flag);
    MPI_File_set_atomicity(fh, 0);
    if (!mynod) printf("reverting back to nonatomic mode\n");

    MPI_Type_vector(10, 10, 20, MPI_INT, &newtype);
    MPI_Type_commit(&newtype);

    MPI_File_set_view(fh, 1000, MPI_INT, newtype, "native", MPI_INFO_NULL);
    if (!mynod) printf("testing MPI_File_get_view\n");
    MPI_File_get_view(fh, &disp, &etype, &filetype, datarep);
    if ((disp != 1000) || strcmp(datarep, "native"))
	printf("disp = %I64, datarep = %s, should be 1000, native\n\n", disp, datarep);

    if (!mynod) printf("testing MPI_File_get_byte_offset\n");
    MPI_File_get_byte_offset(fh, 10, &disp);
    if (disp != (1000+20*sizeof(int))) printf("byte offset = %I64, should be %d\n\n", disp, (int) (1000+20*sizeof(int)));

    MPI_File_get_group(fh, &group);

    if (!mynod) printf("testing MPI_File_set_size\n");
    MPI_File_set_size(fh, 1000+15*sizeof(int));
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_sync(fh);
    MPI_File_get_size(fh, &disp);
    if (disp != 1000+15*sizeof(int)) printf("file size = %I64, should be %d\n\n", disp, (int) (1000+15*sizeof(int)));
 
    if (!mynod) printf("seeking to eof and testing MPI_File_get_position\n");
    MPI_File_seek(fh, 0, MPI_SEEK_END);
    MPI_File_get_position(fh, &disp);
    if (disp != 10) printf("file pointer posn = %I64, should be 10\n\n", disp);

    if (!mynod) printf("testing MPI_File_get_byte_offset\n");
    MPI_File_get_byte_offset(fh, disp, &offset);
    if (offset != (1000+20*sizeof(int))) printf("byte offset = %I64, should be %d\n\n", offset, (int) (1000+20*sizeof(int)));
    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) printf("testing MPI_File_seek with MPI_SEEK_CUR\n");
    MPI_File_seek(fh, -10, MPI_SEEK_CUR);
    MPI_File_get_position(fh, &disp);
    MPI_File_get_byte_offset(fh, disp, &offset);
    if (offset != 1000)
	printf("file pointer posn in bytes = %I64, should be 1000\n\n", offset);

    if (!mynod) printf("preallocating disk space up to 8192 bytes\n");
    MPI_File_preallocate(fh, 8192);

    if (!mynod) printf("closing the file and deleting it\n");
    MPI_File_close(&fh);
    
    MPI_Barrier(MPI_COMM_WORLD);
    if (!mynod) MPI_File_delete(filename, MPI_INFO_NULL);

    MPI_Type_free(&newtype);
    MPI_Type_free(&filetype);
    MPI_Group_free(&group);
    free(filename);
    MPI_Finalize(); 
    return 0;
}
void cache_flush_ind_all(int myid,
			 int numprocs,
			 int size,
			 char *filename)
{
    char *buf;
    MPI_File fh;
    double time;
    /* Calculate how much each processor must write */
    int64_t ind_size = ceil(size / numprocs);
    int64_t comp = 0;
    char *ind_filename = NULL;
    int ind_filename_size = 0;

    /* We will assume that we are using less than 1,000,000 processors 
     * therefore add 1 for NULL char and 6 for each individual processor 
     * for 7 total */
    ind_filename_size += strlen(filename) + 7;
    
    if ((ind_filename = (char *) malloc(ind_filename_size)) == NULL)
    {
	fprintf(stderr, "cache_flush_ind_all: malloc ind_filename of size"
		"%d failed\n", ind_filename_size);
    }
    sprintf(ind_filename, "%s%d", filename, myid);

    ind_size = ind_size * 1024 * 1024; /* ind_size converted to MBytes */
    assert(ind_size != 0);

    if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL)
    {
	fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n",
		    MAX_BUFFER_SIZE);
    }
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    MPI_File_open(MPI_COMM_SELF, ind_filename,
		  MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, 
		      "native", MPI_INFO_NULL);
    MPI_File_seek(fh, 0, MPI_SEEK_SET);

    time = MPI_Wtime();

    while (comp != ind_size)
    {
	if (ind_size - comp > MAX_BUFFER_SIZE)
	{
	    comp += MAX_BUFFER_SIZE;
	    MPI_File_write(fh, buf, MAX_BUFFER_SIZE, 
			   MPI_BYTE, MPI_STATUS_IGNORE);
	}
	else
	{
	    int tmp_bytes = ind_size - comp;
	    comp += ind_size - comp;
	    MPI_File_write(fh, buf, tmp_bytes, 
			   MPI_BYTE, MPI_STATUS_IGNORE);
	}
	
    }

    MPI_File_sync(fh);
    time = MPI_Wtime() - time;
    MPI_File_close(&fh);
    MPI_Barrier(MPI_COMM_WORLD);
#if 0
    MPI_File_delete(ind_filename, MPI_INFO_NULL);
#endif
    if (myid == 0)
    {
	fprintf(stderr, 
		"cache_flush_ind_all: File(s) written of "
		"size %.1f MBytes\n"
		"Time: %f secs Bandwidth: %f MBytes / sec\n\n",
		comp*numprocs/1024.0/1024.0,
		time, comp*numprocs/1024.0/1024.0/time);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    free(ind_filename);
    free(buf);
}
void cache_flush_all(int myid,
		     int numprocs,
		     int size,
		     char *filename)
{
    char *buf;
    MPI_File fh;
    double time;
    /* Calculate how much each processor must write */
    int64_t ind_size = ceil(size / numprocs);
    int64_t comp = 0;

    ind_size = ind_size * 1024 * 1024; /* ind_size converted to MBytes */
    assert(ind_size != 0);

    if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL)
    {
	fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n",
		    MAX_BUFFER_SIZE);
    }
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    MPI_File_open(MPI_COMM_WORLD, filename,
		  MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, ind_size * myid, MPI_BYTE, MPI_BYTE, 
		      "native", MPI_INFO_NULL);
    MPI_File_seek(fh, 0, MPI_SEEK_SET);

    time = MPI_Wtime();

    while (comp != ind_size)
    {
	if (ind_size - comp > MAX_BUFFER_SIZE)
	{
	    comp += MAX_BUFFER_SIZE;
	    MPI_File_write(fh, buf, MAX_BUFFER_SIZE, 
			   MPI_BYTE, MPI_STATUS_IGNORE);
	}
	else
	{
	    int tmp_bytes = ind_size - comp;
	    comp += ind_size - comp;
	    MPI_File_write(fh, buf, tmp_bytes,
			   MPI_BYTE, MPI_STATUS_IGNORE);
	}
	
    }

    free(buf);

    MPI_File_sync(fh);
    time = MPI_Wtime() - time;
    MPI_File_close(&fh);
    MPI_Barrier(MPI_COMM_WORLD);
#if 0
    if (myid == 0)
    {
	MPI_File_delete(filename, MPI_INFO_NULL);
	fprintf(stderr, 
		"cache_flush_all: File %s written/deleted of "
		"size %.1f MBytes\n"
		"Time: %f secs Bandwidth: %f MBytes / sec\n\n",
		filename, comp*numprocs/1024.0/1024.0,
		time, comp*numprocs/1024.0/1024.0/time);
    }
    MPI_Barrier(MPI_COMM_WORLD);
#endif
}
Esempio n. 17
0
int main(int argc, char *argv[])
{
    int rank, size;
    const int N = atoi(argv[1]);

//    printf("Number of testcase = %d\n", N);

    MPI_Init (&argc, &argv);

    double start_time, end_time;
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
    MPI_Comm_size (MPI_COMM_WORLD, &size);
 //   printf("My rank is %d \n", rank); 
    
    //start_time = MPI_Wtime();

    MPI_File fin, fout;
    MPI_Status status;
    int *root_arr;
    int max_arr_size = size > N ? size : N;
    int ret = MPI_File_open(MPI_COMM_WORLD, argv[2], 
                MPI_MODE_RDONLY, MPI_INFO_NULL, &fin);
    
    if (rank == ROOT) {
        root_arr = new int[max_arr_size+3];
//        printf("Enter rank 0 statement ... \n");
        MPI_File_read(fin, root_arr, N, MPI_INT, &status);
/*        
        for (int i = 0; i < N; ++i)
             printf("[START] [Rank %d] root_arr[%d] = %d\n", rank, i, root_arr[i]); 
        printf("Out Rank 0 statement ... \n");
*/
    } 
    MPI_File_close(&fin);
    
    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 
    
    int rank_num = size > N ? N : size;
    const int LAST = rank_num - 1;
    int num_per_node = N / rank_num;
    int *local_arr;
    int num_per_node_diff = N - num_per_node * rank_num;
    int diff = num_per_node_diff;
    bool has_remain = false;
    bool has_remain_rank = rank_num % 2 ? true : false;
    
    if (num_per_node_diff > 0) {
        // Send remaining elements to size - 1
        has_remain = true;
        if (rank == ROOT) {
            MPI_Send(root_arr + N - diff, diff, MPI_INT, LAST, 0, MPI_COMM_WORLD); 
        } else if (rank == LAST) {
            // Handle special case
            num_per_node += num_per_node_diff;
            local_arr = new int[num_per_node+1];
            MPI_Recv(local_arr + num_per_node - diff, diff, 
                    MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        }
    } else if(rank == rank_num - 1) {
        local_arr = new int[num_per_node+1];
    }

    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 
    if (rank != rank_num - 1)
        local_arr = new int[num_per_node+1];
	

    // MPI_Scatter (send_buf, send_count, send_type, recv_buf, recv_count, recv_type, root, comm)
	if (rank < LAST)
        MPI_Scatter(root_arr, num_per_node, MPI_INT, local_arr, 
                    num_per_node, MPI_INT, ROOT, MPI_COMM_WORLD);
    else
        MPI_Scatter(root_arr, num_per_node-diff, MPI_INT, local_arr, 
                    num_per_node-diff, MPI_INT, ROOT, MPI_COMM_WORLD);
    
   // printf("[Rank %d] num_per_node_size = %d\n" ,rank, num_per_node); 
    MPI_Barrier(MPI_COMM_WORLD);
/*
    for (int i = 0; i < num_per_node; ++i)
        printf("[BEFORE] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); 
*/
    if (rank < rank_num) {
        std::sort(local_arr, local_arr + num_per_node);
    }
 
    MPI_Barrier(MPI_COMM_WORLD);
/*
    for (int i = 0; i < num_per_node; ++i)
        printf("[AFTER] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); 
*/    
//    printf("rank %d is arrived\n", rank);
    
    MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file 
    
    int *recv_buf, *send_buf;
    int recv_len, send_len, success;
    if (rank_num > 1 && rank < rank_num) {
        if (rank == ROOT) {
            send_len = num_per_node;
            MPI_Send(&send_len, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
            MPI_Recv(&success, 1, MPI_INT, rank+1,
                        MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            MPI_Send(local_arr, send_len, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
        } else {
            MPI_Recv(&recv_len, 1, MPI_INT, rank-1, 
                        MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            
            success = 1;
            MPI_Send(&success, 1, MPI_INT, rank-1, 0, MPI_COMM_WORLD);
            
            send_len = recv_len + num_per_node;
            recv_buf = new int[recv_len];
            send_buf = new int[send_len];
            
  //          printf("RANK %d recv_len = %d SUCCESS\n", rank, recv_len);
            MPI_Recv(recv_buf, recv_len, MPI_INT, 
                        rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            
//            printf("RANK %d complete recevice array  SUCCESS\n", rank);
            
            int i = 0, j = 0, cur = 0;
            while (i < recv_len && j < num_per_node) {
                // Do MERGE array 
                if (recv_buf[i] < local_arr[j]) {
                    send_buf[cur++] = recv_buf[i++];
                } else {
                    send_buf[cur++] = local_arr[j++];
                }
            }
            while (i < recv_len)
                send_buf[cur++] = recv_buf[i++];

            while (j < num_per_node)
                send_buf[cur++] = local_arr[j++];
 /*          
            for (int k = 0; k < cur; k++) {
                printf("[RANK %d] send_buf[%d] = %d\n", rank, k, send_buf[k]);

            }
 */           
            if(rank != LAST) { 
                MPI_Send(&send_len, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
 //               printf("RANK %d send_len SUCCESS\n", rank);
                MPI_Recv(&success, 1, MPI_INT, rank+1, 
                        MPI_ANY_TAG, MPI_COMM_WORLD, &status);

                MPI_Send(send_buf, send_len, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
 //               printf("RANK %d complete sending  array  SUCCESS\n", rank);
            }

            if(rank != LAST) 
                delete [] send_buf;
            delete [] recv_buf;
        }
    }

    
 //  printf("rank %d is arrived\n", rank);
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_open(MPI_COMM_WORLD, argv[3], 
        MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fout);
    
    if (rank == LAST) {
        if (rank == 0) send_buf = local_arr;
        MPI_File_write(fout, send_buf, N, MPI_INT, &status);
/*        
        for (int i = 0; i < N; ++i) {
            printf("[FINAL] [Rank %d] ans[%d] = %d\n", rank, i, send_buf[i]);
        }
*/
    }
    MPI_File_close(&fout);
    
//    printf("CLOSE rank %d is arrived\n", rank);
    MPI_Barrier(MPI_COMM_WORLD);
    
    if (rank != 0) {
        delete []  local_arr;
     //   printf("[FREE] [RANK %d] SUCCESS FREE\n", rank);
    } else {
        delete [] root_arr;
        delete [] local_arr;;
    }
    MPI_Finalize();
     
    return 0;
}
Esempio n. 18
0
main(int argc, char* argv[]){
	clock_t start, end;
	unsigned int cpu_time_used;
	unsigned int i, j, rank, numProcesses, blockLength;
	unsigned int *compBlockLengthArray;
	unsigned int distinctCharacterCount, combinedHuffmanNodes, frequency[256], inputFileLength, compBlockLength;
	unsigned char *inputFileData, *compressedData, writeBit = 0, bitsFilled = 0, bitSequence[255], bitSequenceLength = 0;
	FILE *inputFile;

	MPI_Init( &argc, &argv);
	MPI_File mpi_inputFile, mpi_compressedFile;
	MPI_Status status;

	// get rank and number of processes value
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);

	// get file size
	if(rank == 0){
		inputFile = fopen(argv[1], "rb");
		fseek(inputFile, 0, SEEK_END);
		inputFileLength = ftell(inputFile);
		fseek(inputFile, 0, SEEK_SET);
		fclose(inputFile);
	}

	//broadcast size of file to all the processes 
	MPI_Bcast(&inputFileLength, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

	// get file chunk size

	blockLength = inputFileLength / numProcesses;

	if(rank == (numProcesses-1)){
		blockLength = inputFileLength - ((numProcesses-1) * blockLength);	
	}
	
	// open file in each process and read data and allocate memory for compressed data
	MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_inputFile);
	MPI_File_seek(mpi_inputFile, rank * blockLength, MPI_SEEK_SET);

	inputFileData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));	
	MPI_File_read(mpi_inputFile, inputFileData, blockLength, MPI_UNSIGNED_CHAR, &status);

	// start clock
	if(rank == 0){
		start = clock();
	}
	
	// find the frequency of each symbols
	for (i = 0; i < 256; i++){
		frequency[i] = 0;
	}
	for (i = 0; i < blockLength; i++){
		frequency[inputFileData[i]]++;
	}
	
	compressedData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));	
	compBlockLengthArray = (unsigned int *)malloc(numProcesses * sizeof(unsigned int));
	
	// initialize nodes of huffman tree
	distinctCharacterCount = 0;
	for (i = 0; i < 256; i++){
		if (frequency[i] > 0){
			huffmanTreeNode[distinctCharacterCount].count = frequency[i];
			huffmanTreeNode[distinctCharacterCount].letter = i;
			huffmanTreeNode[distinctCharacterCount].left = NULL;
			huffmanTreeNode[distinctCharacterCount].right = NULL;
			distinctCharacterCount++;
		}
	}

	// build tree 
	for (i = 0; i < distinctCharacterCount - 1; i++){
		combinedHuffmanNodes = 2 * i;
		sortHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
		buildHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
	}
	
	// build table having the bitSequence sequence and its length
	buildHuffmanDictionary(head_huffmanTreeNode, bitSequence, bitSequenceLength);

	// compress
	compBlockLength = 0;
	for (i = 0; i < blockLength; i++){
		for (j = 0; j < huffmanDictionary[inputFileData[i]].bitSequenceLength; j++){
			if (huffmanDictionary[inputFileData[i]].bitSequence[j] == 0){
				writeBit = writeBit << 1;
				bitsFilled++;
			}
			else{
				writeBit = (writeBit << 1) | 01;
				bitsFilled++;
			}
			if (bitsFilled == 8){
				compressedData[compBlockLength] = writeBit;
				bitsFilled = 0;
				writeBit = 0;
				compBlockLength++;
			}
		}
	}

	if (bitsFilled != 0){
		for (i = 0; (unsigned char)i < 8 - bitsFilled; i++){
			writeBit = writeBit << 1;
		}
		compressedData[compBlockLength] = writeBit;
		compBlockLength++;
	}

	// calculate length of compressed data
	compBlockLength = compBlockLength + 1024;
	compBlockLengthArray[rank] = compBlockLength;

	// send the length of each process to process 0
	MPI_Gather(&compBlockLength, 1, MPI_UNSIGNED, compBlockLengthArray, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

	// update the data to reflect the offset
	if(rank == 0){
		compBlockLengthArray[0] = (numProcesses + 2) * 4 + compBlockLengthArray[0];
		for(i = 1; i < numProcesses; i++){
			compBlockLengthArray[i] = compBlockLengthArray[i] + compBlockLengthArray[i - 1];
		}
		for(i = (numProcesses - 1); i > 0; i--){
			compBlockLengthArray[i] = compBlockLengthArray[i - 1];
		}
		compBlockLengthArray[0] = (numProcesses + 2) * 4;
	}

	// broadcast size of each compressed data block to all the processes 
	MPI_Bcast(compBlockLengthArray, numProcesses, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

	// get time
	if(rank == 0){
		end = clock();
		cpu_time_used = ((end - start)) * 1000 / CLOCKS_PER_SEC;
		printf("Time taken: %d:%d s\n", cpu_time_used / 1000, cpu_time_used % 1000);
	}
	
	// write data to file
	MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_compressedFile);

	if(rank == 0){
		MPI_File_write(mpi_compressedFile, &inputFileLength, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
		MPI_File_write(mpi_compressedFile, &numProcesses, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
		MPI_File_write(mpi_compressedFile, compBlockLengthArray, numProcesses, MPI_UNSIGNED, MPI_STATUS_IGNORE);
	}
	MPI_File_seek(mpi_compressedFile, compBlockLengthArray[rank], MPI_SEEK_SET);
	MPI_File_write(mpi_compressedFile, frequency, 256, MPI_UNSIGNED, MPI_STATUS_IGNORE);
	MPI_File_write(mpi_compressedFile, compressedData, (compBlockLength - 1024), MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

	// close open files
	MPI_File_close(&mpi_compressedFile); 	
	MPI_File_close(&mpi_inputFile);
	MPI_Barrier(MPI_COMM_WORLD);
	
	free(head_huffmanTreeNode);
	free(current_huffmanTreeNode);
	free(compBlockLengthArray);
	free(inputFileData);
	free(compressedData);
	MPI_Finalize();
}
Esempio n. 19
0
int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int initFlag;
    MPI_Initialized(&initFlag);
    if (!initFlag) {
        printf("MPI init failed\n");
        return 8;
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    int l,mm=5;
    int nx,ny,nz,lt,nedge;
    float frequency;
    float velmax;
    float dt;
    int ncx_shot1,ncy_shot1,ncz_shot;
    int ishot,ncy_shot,ncx_shot;
    float unit;
    int nxshot,nyshot,dxshot,dyshot;
    char infile[80],outfile[80],logfile[80],tmp[80], nodelog[84];
    FILE  *fin, *fout, *flog, *fnode;
    MPI_File mpi_flog, mpi_fout;
    MPI_Status mpi_status;
    struct timeval start,end;
    float all_time;

    float *u, *v, *w, *up, *up1, *up2,
            *vp, *vp1, *vp2, *wp, *wp1, *wp2,
            *us, *us1, *us2, *vs, *vs1, *vs2,
            *ws, *ws1, *ws2, *vpp, *density, *vss;
    float c[5][7];
    float *wave;
    float nshot,t0,tt,c0;
    float dtx,dtz,dtxz,dr1,dr2,dtx4,dtz4,dtxz4;
    char message[100];

    if(argc<4)
    {
        printf("please add 3 parameter: inpurfile, outfile, logfile\n");
        exit(1);
    }

    message[99] = 0;    // Avoid string buffer overrun

    strcpy(infile,argv[1]);
    strcpy(outfile,argv[2]);
    strcpy(logfile,argv[3]);
    strcpy(nodelog,logfile);
    strcat(nodelog, ".node");

    strcpy(tmp,"date ");
    strncat(tmp, ">> ",3);
    strncat(tmp, logfile, strlen(logfile));
    if (proc_rank == 0) {
        flog = fopen(logfile,"w");
        fprintf(flog,"------------start time------------\n");
        fclose(flog);
        system(tmp);
        gettimeofday(&start,NULL);
    }
    fin = fopen(infile,"r");
    if(fin == NULL)
    {
        printf("file %s is  not exist\n",infile);
        exit(2);
    }
    fscanf(fin,"nx=%d\n",&nx);
    fscanf(fin,"ny=%d\n",&ny);
    fscanf(fin,"nz=%d\n",&nz);
    fscanf(fin,"lt=%d\n",&lt);
    fscanf(fin,"nedge=%d\n",&nedge);
    fscanf(fin,"ncx_shot1=%d\n",&ncx_shot1);
    fscanf(fin,"ncy_shot1=%d\n",&ncy_shot1);
    fscanf(fin,"ncz_shot=%d\n",&ncz_shot);
    fscanf(fin,"nxshot=%d\n",&nxshot);
    fscanf(fin,"nyshot=%d\n",&nyshot);
    fscanf(fin,"frequency=%f\n",&frequency);
    fscanf(fin,"velmax=%f\n",&velmax);
    fscanf(fin,"dt=%f\n",&dt);
    fscanf(fin,"unit=%f\n",&unit);
    fscanf(fin,"dxshot=%d\n",&dxshot);
    fscanf(fin,"dyshot=%d\n",&dyshot);
    fclose(fin);
    if (proc_rank == 0) {   // Master
        printf("\n--------workload parameter--------\n");
        printf("nx=%d\n",nx);
        printf("ny=%d\n",ny);
        printf("nz=%d\n",nz);
        printf("lt=%d\n",lt);
        printf("nedge=%d\n",nedge);
        printf("ncx_shot1=%d\n",ncx_shot1);
        printf("ncy_shot1=%d\n",ncy_shot1);
        printf("ncz_shot=%d\n",ncz_shot);
        printf("nxshot=%d\n",nxshot);
        printf("nyshot=%d\n",nyshot);
        printf("frequency=%f\n",frequency);
        printf("velmax=%f\n",velmax);
        printf("dt=%f\n",dt);
        printf("unit=%f\n",unit);
        printf("dxshot=%d\n",dxshot);
        printf("dyshot=%d\n\n",dyshot);

        flog = fopen(logfile,"a");
        fprintf(flog,"\n--------workload parameter--------\n");
        fprintf(flog,"nx=%d\n",nx);
        fprintf(flog,"ny=%d\n",ny);
        fprintf(flog,"nz=%d\n",nz);
        fprintf(flog,"lt=%d\n",lt);
        fprintf(flog,"nedge=%d\n",nedge);
        fprintf(flog,"ncx_shot1=%d\n",ncx_shot1);
        fprintf(flog,"ncy_shot1=%d\n",ncy_shot1);
        fprintf(flog,"ncz_shot=%d\n",ncz_shot);
        fprintf(flog,"nxshot=%d\n",nxshot);
        fprintf(flog,"nyshot=%d\n",nyshot);
        fprintf(flog,"frequency=%f\n",frequency);
        fprintf(flog,"velmax=%f\n",velmax);
        fprintf(flog,"dt=%f\n",dt);
        fprintf(flog,"unit=%f\n",unit);
        fprintf(flog,"dxshot=%d\n",dxshot);
        fprintf(flog,"dyshot=%d\n\n",dyshot);
        fclose(flog);
        fnode = fopen(nodelog, "a");
        fprintf(fnode,"World size: %d\n", world_size);
        fclose(fnode);
    }

#ifdef _WITH_PHI
    // [Afa] It is recommended that for Intel Xeon Phi data is 64-byte aligned.
    // Upon successful completion, posix_memalign() shall return zero
    if (posix_memalign((void **)&u  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&v  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&w  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws2, 64, sizeof(float)*nz*ny*nx)) return 2;
#else
    u       = (float*)malloc(sizeof(float)*nz*ny*nx);
    v       = (float*)malloc(sizeof(float)*nz*ny*nx);
    w       = (float*)malloc(sizeof(float)*nz*ny*nx);
    up      = (float*)malloc(sizeof(float)*nz*ny*nx);
    up1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    up2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us      = (float*)malloc(sizeof(float)*nz*ny*nx);
    us1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws      = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws2     = (float*)malloc(sizeof(float)*nz*ny*nx);
#endif
    // [Afa] Those are not offloaded to phi yet
    vpp     = (float*)malloc(sizeof(float)*nz*ny*nx);
    density = (float*)malloc(sizeof(float)*nz*ny*nx);
    vss     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wave = (float*)malloc(sizeof(float)*lt);

    nshot=nxshot*nyshot;
    t0=1.0/frequency;

    // [Afa] Branch optmization
    // TODO: Will compiler optimize the `condition'?
    //       i.e Can I write `for(i=0;i< (nz < 210 ? nz : 210);i++)'?
    int condition = nz < 210 ? nz : 210;
    for(int i=0; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2300.;
                vss[i*ny*nx+j*nx+k]=1232.;
                density[i*ny*nx+j*nx+k]=1.;
            }
        }
    }

    condition = nz < 260 ? nz : 260;
    for(int i=210; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2800.;
                vss[i*ny*nx+j*nx+k]=1509.;
                density[i*ny*nx+j*nx+k]=2.;
            }
        }
    }

    for(int i=260;i<nz;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++)
            {
                vpp[i*ny*nx+j*nx+k]=3500.;
                vss[i*ny*nx+j*nx+k]=1909.;
                density[i*ny*nx+j*nx+k]=2.5;
            }
        }
    }

    for(l=0;l<lt;l++)
    {
        tt=l*dt;
        tt=tt-t0;
        float sp=PIE*frequency*tt;
        float fx=100000.*exp(-sp*sp)*(1.-2.*sp*sp);
        wave[l]=fx;
    }

    // TODO: [Afa] Data produced by code below are static. See table below
    if(mm==5)
    {
        c0=-2.927222164;
        c[0][0]=1.66666665;
        c[1][0]=-0.23809525;
        c[2][0]=0.03968254;
        c[3][0]=-0.004960318;
        c[4][0]=0.0003174603;
    }

    c[0][1]=0.83333;
    c[1][1]=-0.2381;
    c[2][1]=0.0595;
    c[3][1]=-0.0099;
    c[4][1]=0.0008;

    for(int i=0;i<5;i++)
        for(int j=0;j<5;j++)
            c[j][2+i]=c[i][1]*c[j][1];
    /*
     * mm == 5, c =
     * 1.666667    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * -0.238095   -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.039683    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * -0.004960   -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000317    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
    */

    /*
     * mm != 5, c =
     * 0.000000    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * 0.000000    -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.000000    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * 0.000000    -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000000    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
     */

    dtx=dt/unit;
    dtz=dt/unit;
    dtxz=dtx*dtz;

    dr1=dtx*dtx/2.;
    dr2=dtz*dtz/2.;

    dtx4=dtx*dtx*dtx*dtx;
    dtz4=dtz*dtz*dtz*dtz;
    dtxz4=dtx*dtx*dtz*dtz;

    if (proc_rank == 0) {
        fout = fopen(outfile, "wb");
        fclose(fout);
    }   // [Afa] Truncate file. We need a prettier way

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_open(MPI_COMM_WORLD, outfile, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_fout);
    MPI_File_open(MPI_COMM_WORLD, nodelog, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_flog);
    // [Afa] *About Nodes Number* nshot (i.e nxshot * nyshot) should be multiple of node numbers,
    //       or there will be hungry processes
    int loop_per_proc = ((int)nshot % world_size == 0) ? (nshot / world_size) : (nshot / world_size + 1);
    printf("\x1B[31mDEBUG:\x1b[39;49m World size %d, Loop per Proc %d, nshot %f, I am No. %d\n",
           world_size, loop_per_proc, nshot, proc_rank);

    //    for(ishot=1;ishot<=nshot;ishot++)   // [Afa] nshot is 20 in para1.in, but 200 in para2.in
    for (int loop_index = 0; loop_index < loop_per_proc; ++loop_index)
    {
        ishot = loop_index + proc_rank * loop_per_proc + 1; // [Afa] See commented code 2 lines above to understand this line
        if (ishot <= nshot) { // [Afa] ishot <= nshot
            printf("shot %d, process %d\n",ishot, proc_rank);
            snprintf(message, 29, "shot %6d, process %6d\n", ishot, proc_rank);     // [Afa] Those numbers:
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);                // 28: string without '\0'
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);           // 29: with '\0'
        } else {
            printf("shot HUNGRY, process %d\n", proc_rank);
            snprintf(message, 29, "shot HUNGRY, process %6d\n", proc_rank);
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);
            continue;
        }
        ncy_shot=ncy_shot1+(ishot/nxshot)*dyshot;
        ncx_shot=ncx_shot1+(ishot%nxshot)*dxshot;

        // [Afa] Matrix is zeroed in every loop
        // i.e. The relation between those matrices in each loop is pretty loose
        // Matrices not zeroed are: vpp, density, vss and wave, and they're not changed (read-only)
        // We only need to partially collect matrix `up'

        // TODO: [Afa] Get a better way to pass those pointers, and mark them as `restrict'
        // And WHY are they using cpp as extension? C++11 doesn't support `restrict'
        zero_matrices(u, w, ws2, up2, vp1, wp1, us, ws, wp, us2, us1, wp2,
                      v, up1, nz, nx, up, ny, ws1, vs, vp2, vs1, vs2, vp);

        for(l=1;l<=lt;l++)
        {
            float xmax=l*dt*velmax;
            int nleft=ncx_shot-xmax/unit-10;
            int nright=ncx_shot+xmax/unit+10;
            int nfront=ncy_shot-xmax/unit-10;
            int nback=ncy_shot+xmax/unit+10;
            int ntop=ncz_shot-xmax/unit-10;
            int nbottom=ncz_shot+xmax/unit+10;
            if(nleft<5) nleft=5;
            if(nright>nx-5) nright=nx-5;
            if(nfront<5) nfront=5;
            if(nback>ny-5) nback=ny-5;
            if(ntop<5) ntop=5;
            if(nbottom>nz-5) nbottom=nz-5;
            ntop = ntop-1;
            nfront = nfront-1;
            nleft = nleft-1;

            // Although up, vp, wp, us, vs, ws are modified below, we're sure there's no race condition.
            // Each loop accesses a UNIQUE element in the array, and the value is not used, no need to worry about the dirty cache
#pragma omp parallel for shared(u) shared(v) shared(w) shared(up1) shared(up2) shared(vp1) shared(vp2) shared(wp1) \
    shared(wp2) shared(us) shared(us1) shared(us2) shared(vs) shared(vs1) shared(vs2) shared(ws) shared(ws1) shared(ws2) \
    shared(vss) shared(vpp) shared(dr1) shared(dr2) shared(dtz) shared(dtx) shared(ncx_shot) shared(ncy_shot) shared(ncz_shot) \
    shared(wave)
            for(int k=ntop;k<nbottom;k++) {
                for(int j=nfront;j<nback;j++) {
                    for(int i=nleft;i<nright;i++)
                    {
                        float vvp2,drd1,drd2,vvs2;
                        float px,sx;
                        if(i==ncx_shot-1&&j==ncy_shot-1&&k==ncz_shot-1)
                        {
                            px=1.;
                            sx=0.;
                        }
                        else
                        {
                            px=0.;
                            sx=0.;
                        }
                        vvp2=vpp[k*ny*nx+j*nx+i]*vpp[k*ny*nx+j*nx+i];
                        drd1=dr1*vvp2;
                        drd2=dr2*vvp2;

                        vvs2=vss[k*ny*nx+j*nx+i]*vss[k*ny*nx+j*nx+i];
                        drd1=dr1*vvs2;
                        drd2=dr2*vvs2;

                        float tempux2=0.0f;
                        float tempuy2=0.0f;
                        float tempuz2=0.0f;
                        float tempvx2=0.0f;
                        float tempvy2=0.0f;
                        float tempvz2=0.0f;
                        float tempwx2=0.0f;
                        float tempwy2=0.0f;
                        float tempwz2=0.0f;
                        float tempuxz=0.0f;
                        float tempuxy=0.0f;
                        float tempvyz=0.0f;
                        float tempvxy=0.0f;
                        float tempwxz=0.0f;
                        float tempwyz=0.0f;

                        // This will make the compiler do the vectorization
                        for(int kk=1;kk<=mm;kk++) {
                            tempux2 += c[kk-1][0]*(u[k*ny*nx+j*nx+(i+kk)]+u[k*ny*nx+j*nx+(i-kk)]);
                            tempuy2 += c[kk-1][0]*(u[k*ny*nx+(j+kk)*nx+i]+u[k*ny*nx+(j-kk)*nx+i]);
                            tempuz2 += c[kk-1][0]*(u[(k+kk)*ny*nx+j*nx+i]+u[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempvx2 += c[kk-1][0]*(v[k*ny*nx+j*nx+(i+kk)]+v[k*ny*nx+j*nx+(i-kk)]);
                            tempvy2 += c[kk-1][0]*(v[k*ny*nx+(j+kk)*nx+i]+v[k*ny*nx+(j-kk)*nx+i]);
                            tempvz2 += c[kk-1][0]*(v[(k+kk)*ny*nx+j*nx+i]+v[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempwx2 += c[kk-1][0]*(w[k*ny*nx+j*nx+(i+kk)]+w[k*ny*nx+j*nx+(i-kk)]);
                            tempwy2 += c[kk-1][0]*(w[k*ny*nx+(j+kk)*nx+i]+w[k*ny*nx+(j-kk)*nx+i]);
                            tempwz2 += c[kk-1][0]*(w[(k+kk)*ny*nx+j*nx+i]+w[(k-kk)*ny*nx+j*nx+i]);
                        }

                         //for(kk=1;kk<=mm;kk++) end

                        tempux2=(tempux2+c0*u[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        // u[k][j][i]
                        tempuy2=(tempuy2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        // u[k][j][i]
                        tempuz2=(tempuz2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;
                        // u[k][j][i]

                        tempvx2=(tempvx2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempvy2=(tempvy2+c0*v[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        tempvz2=(tempvz2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;

                        tempwx2=(tempwx2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwy2=(tempwy2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwz2=(tempwz2+c0*w[k*ny*nx+j*nx+i])*vvp2*dtz*dtz;

                        // This loop is auto-vectorized
                        for(int kk=1;kk<=mm;kk++)
                        {
                            for(int kkk=1;kkk<=mm;kkk++)
                            {
                                tempuxz=tempuxz+c[kkk-1][1+kk]*(u[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -u[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +u[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -u[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                                // u[k+kkk][j][i+kk], u[k-kkk][j][i+kk], u[k-kkk][j][i-kk], u[k+kkk][j][i-kk]
                                tempuxy=tempuxy+c[kkk-1][1+kk]*(u[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -u[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +u[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -u[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempvyz=tempvyz+c[kkk-1][1+kk]*(v[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -v[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +v[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -v[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempvxy=tempvxy+c[kkk-1][1+kk]*(v[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -v[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +v[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -v[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempwyz=tempwyz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -w[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +w[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -w[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempwxz=tempwxz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -w[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +w[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -w[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                            } // for(kkk=1;kkk<=mm;kkk++) end
                        } //for(kk=1;kk<=mm;kk++) end

                        // LValues below are only changed here
                        up[k*ny*nx+j*nx+i]=2.*up1[k*ny*nx+j*nx+i]-up2[k*ny*nx+j*nx+i]
                                +tempux2+tempwxz*vvp2*dtz*dtx
                                +tempvxy*vvp2*dtz*dtx;
                        // up1[k][j][j], up2[k][j][i], up[k][j][i]
                        vp[k*ny*nx+j*nx+i]=2.*vp1[k*ny*nx+j*nx+i]-vp2[k*ny*nx+j*nx+i]
                                +tempvy2+tempuxy*vvp2*dtz*dtx
                                +tempwyz*vvp2*dtz*dtx;
                        wp[k*ny*nx+j*nx+i]=2.*wp1[k*ny*nx+j*nx+i]-wp2[k*ny*nx+j*nx+i]
                                +tempwz2+tempuxz*vvp2*dtz*dtx
                                +tempvyz*vvp2*dtz*dtx
                                +px*wave[l-1];
                        us[k*ny*nx+j*nx+i]=2.*us1[k*ny*nx+j*nx+i]-us2[k*ny*nx+j*nx+i]+tempuy2+tempuz2
                                -tempvxy*vvs2*dtz*dtx-tempwxz*vvs2*dtz*dtx;
                        vs[k*ny*nx+j*nx+i]=2.*vs1[k*ny*nx+j*nx+i]-vs2[k*ny*nx+j*nx+i]+tempvx2+tempvz2
                                -tempuxy*vvs2*dtz*dtx-tempwyz*vvs2*dtz*dtx;
                        ws[k*ny*nx+j*nx+i]=2.*ws1[k*ny*nx+j*nx+i]-ws2[k*ny*nx+j*nx+i]+tempwx2+tempwy2
                                -tempuxz*vvs2*dtz*dtx-tempvyz*vvs2*dtz*dtx;
                    }//for(i=nleft;i<nright;i++) end
                }
            }

            // Again, those are UNIQUE access. Safe to share
#pragma omp parallel for shared(up) shared(us) shared(vp) shared(vs) shared(wp) shared(ws) shared(u) shared(v) shared(w) \
    shared(up2) shared(up1) shared(us2) shared(us1) shared(vp2) shared(vp1) shared(wp2) shared(wp1) shared(ws2) shared(ws1)
            for(int k=ntop;k<nbottom;k++)
                for(int j=nfront;j<nback;j++)
                    for(int i=nleft;i<nright;i++)
                    {
                        u[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]+us[k*ny*nx+j*nx+i];
                        v[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]+vs[k*ny*nx+j*nx+i];
                        w[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]+ws[k*ny*nx+j*nx+i];

                        up2[k*ny*nx+j*nx+i]=up1[k*ny*nx+j*nx+i];
                        up1[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i];
                        us2[k*ny*nx+j*nx+i]=us1[k*ny*nx+j*nx+i];
                        us1[k*ny*nx+j*nx+i]=us[k*ny*nx+j*nx+i];
                        vp2[k*ny*nx+j*nx+i]=vp1[k*ny*nx+j*nx+i];
                        vp1[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i];
                        vs2[k*ny*nx+j*nx+i]=vs1[k*ny*nx+j*nx+i];
                        vs1[k*ny*nx+j*nx+i]=vs[k*ny*nx+j*nx+i];
                        wp2[k*ny*nx+j*nx+i]=wp1[k*ny*nx+j*nx+i];
                        wp1[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i];
                        ws2[k*ny*nx+j*nx+i]=ws1[k*ny*nx+j*nx+i];
                        ws1[k*ny*nx+j*nx+i]=ws[k*ny*nx+j*nx+i];
                    }//for(i=nleft;i<nright;i++) end
        }//for(l=1;l<=lt;l++) end
        // [Afa] Do we need to keep the order of data?
        // [Afa Update] Yes, we do need to KEEP THE ORDER of data
        //        fwrite(up+169*ny*nx,sizeof(float),ny*nx,fout);    // This is the original fwrite

        MPI_File_seek(mpi_fout, (ishot - 1) * ny * nx * sizeof(float), MPI_SEEK_SET);
        MPI_File_write(mpi_fout, up + 169 * ny * nx, ny * nx, MPI_FLOAT, &mpi_status);

    }//for(ishot=1;ishot<=nshot;ishot++) end

    MPI_File_close(&mpi_fout);
    MPI_File_close(&mpi_flog);

    free(u);
    free(v);
    free(w);
    free(up);
    free(up1);
    free(up2);
    free(vp);
    free(vp1);
    free(vp2);
    free(wp);
    free(wp1);
    free(wp2);
    free(us);
    free(us1);
    free(us2);
    free(vs);
    free(vs1);
    free(vs2);
    free(ws);
    free(ws1);
    free(ws2);
    free(vpp);
    free(density);
    free(vss);
    free(wave);

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();

    if (proc_rank == 0) {
        gettimeofday(&end,NULL);
        all_time = (end.tv_sec-start.tv_sec)+(float)(end.tv_usec-start.tv_usec)/1000000.0;
        printf("run time:\t%f s\n",all_time);
        flog = fopen(logfile,"a");
        fprintf(flog,"\nrun time:\t%f s\n\n",all_time);
        fclose(flog);
        flog = fopen(logfile,"a");
        fprintf(flog,"------------end time------------\n");
        fclose(flog);
        system(tmp);
    }


    // Why return 1?
    return 0;
}
Esempio n. 20
0
int main(int argc, char *argv[]) {

	int i, n, nlocal;
	int numprocs, dims[2], periods[2], keep_dims[2];
	int myrank, my2drank, mycoords[2];
	MPI_File f; char* filename = "input/16";
	MPI_Comm comm_2d, comm_row, comm_col;
	MPI_Status status;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

	dims[ROW] = dims[COL] = sqrt(numprocs);

	periods[ROW] = periods[COL] = 1;
	MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);

	MPI_Comm_rank(comm_2d, &my2drank);
	MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

	keep_dims[ROW] = 0;
	keep_dims[COL] = 1;
	MPI_Cart_sub(comm_2d, keep_dims, &comm_row);

	keep_dims[ROW] = 1;
	keep_dims[COL] = 0;
	MPI_Cart_sub(comm_2d, keep_dims, &comm_col);

	if(MPI_File_open(comm_2d, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
		fprintf(stderr, "Cannot open file %s\n", filename);
		MPI_Abort(comm_2d, FILE_NOT_FOUND);
		MPI_Finalize();
		return 1;
	}
	MPI_File_seek(f, 0, MPI_SEEK_SET);
	MPI_File_read(f, &n, 1, MPI_INT, &status); nlocal = n/dims[ROW];

	int *a = (int *)malloc(nlocal * nlocal * sizeof(int));
	for(i = 0; i < nlocal; i++) {
		MPI_File_seek(f, ((mycoords[0] * nlocal  + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET);
		MPI_File_read(f, &a[i * nlocal], nlocal, MPI_INT, &status);
	}
	MPI_File_close(&f);

 	int j;
	if(my2drank == 3) {
		for(i = 0; i < nlocal; i++) {
			for(j = 0; j < nlocal; j++) {
				printf("%d ", a[i * nlocal +j]);
			}
			printf("\n");
		}
	}

	double start = MPI_Wtime();
	floyd_all_pairs_sp_2d(n, nlocal, a, comm_2d, comm_row, comm_col);
	double stop = MPI_Wtime();
	printf("[%d] Completed in %1.3f seconds\n", my2drank, stop-start);

	MPI_Comm_free(&comm_col);
	MPI_Comm_free(&comm_row);
	if(my2drank == 3) {
		for(i = 0; i < nlocal; i++) {
			for(j = 0; j < nlocal; j++) {
				printf("%d ", a[i * nlocal +j]);
			}
			printf("\n");
		}
	}
	if(MPI_File_open(comm_2d, "output/16", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
			printf("Cannot open file %s\n", "out");
			MPI_Abort(comm_2d, FILE_NOT_FOUND);
			MPI_Finalize();
			return 1;
	}
	if(my2drank == 0) {
		MPI_File_seek(f, 0, MPI_SEEK_SET);
		MPI_File_write(f, &n, 1, MPI_INT, &status);
	}
	for(i = 0; i < nlocal; i++) {
		MPI_File_seek(f, ((mycoords[0] * nlocal  + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET);
		MPI_File_write(f, &a[i * nlocal], nlocal, MPI_INT, &status);
	}

	MPI_File_close(&f);
	free(a);

	MPI_Comm_free(&comm_2d);
	MPI_Finalize();
	return 0;
}
Esempio n. 21
0
void step4(inst i, int r, int s)
{
    inst instance = i;
    int rank = r;
    int size = s;

    // Creation of the 2D torus we will then use
    MPI_Comm comm;
    int dim[2] = {instance.p, instance.q};
    int period[2] = {1, 1};
    int reorder = 0;
    int coord[2];
    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &comm);
    MPI_Cart_coords(comm, rank, 2, coord);


    grid global_grid;

    char type = 0;
    MPI_File input_file;

    // We start by reading the header of the file
    MPI_File_open(comm, instance.input_path, MPI_MODE_RDONLY, MPI_INFO_NULL, &input_file);
    MPI_File_read_all(input_file, &type, 1, MPI_CHAR, MPI_STATUS_IGNORE);

    if(type == 1)
    {
	if (rank == 0) fprintf(stderr, "Error: type 1 files are not supported in step 4\n");
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	exit(EXIT_FAILURE);
    }
	
    // we needed to swap the next 2 lines
    MPI_File_read_all(input_file, &(global_grid.n), 1, MPI_UINT64_T, MPI_STATUS_IGNORE);
    MPI_File_read_all(input_file, &(global_grid.m), 1, MPI_UINT64_T, MPI_STATUS_IGNORE);

#ifdef DEBUG
    if(rank == 0)
	printf("n, m = %zu %zu\n", global_grid.n, global_grid.m);
#endif


    if(!(global_grid.n % instance.p == 0 && global_grid.m % instance.q == 0))
    {
	if(rank == 0)
	    fprintf(stderr, "Error: please choose the grid parameters so they divide the grid of the cellular automaton. For example %zu %zu, but you need to move from %d procs to %zu\n", instance.p + (global_grid.n % instance.p), instance.q + (global_grid.m % instance.q), size, (instance.p + (global_grid.n % instance.p))*(instance.q + (global_grid.m % instance.q)));
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	exit(EXIT_FAILURE);
    }

    size_t local_nrows = global_grid.n/instance.p;
    size_t local_ncols = global_grid.m/instance.q;
    
    // Now we create the data structures.
    int blocks[2] = {1, 2};
    MPI_Datatype types[2] = {MPI_BYTE, MPI_DOUBLE};
    MPI_Aint a_size = sizeof(cell2);
    MPI_Aint a_disp[3] = {offsetof(cell2, type), offsetof(cell2, u), offsetof(cell2, s)};

    MPI_Aint p_size = 17;
    MPI_Aint p_disp[3] = {0, 1, 9};

    MPI_Datatype p_tmp, a_tmp, p_cell, a_cell;

    // Aligned struct, memory representation
    MPI_Type_create_struct(2, blocks, a_disp, types, &a_tmp);
    MPI_Type_create_resized(a_tmp, 0, a_size, &a_cell);
    MPI_Type_commit(&a_cell);
	    
    // Packed struct, file-based representation
    MPI_Type_create_struct(2, blocks, p_disp, types, &p_tmp);
    MPI_Type_create_resized(p_tmp, 0, p_size, &p_cell);
    MPI_Type_commit(&p_cell);

    // Now, we create our matrix
    MPI_Datatype matrix;
    int sizes[2] = {global_grid.n, global_grid.m};
    int subsizes[2] = {local_nrows, local_ncols};
    int starts[2] = {0, 0};
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, p_cell, &matrix);
    MPI_Type_commit(&matrix);

    // We extend this matrix
    MPI_Datatype ematrix;
    int e_subsizes[2] = {2 + subsizes[0], 2 + subsizes[1]};
    int e_start[2] = {1, 1};
    MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, a_cell, &ematrix);
    MPI_Type_commit(&ematrix);
	

    // The next 3 types are for the export of the grid
    MPI_Datatype d_type;
    MPI_Type_create_resized(MPI_DOUBLE, 0, sizeof(cell2), &d_type);
    MPI_Type_commit(&d_type);
	

    MPI_Datatype d_matrix;
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &d_matrix);
    MPI_Type_commit(&d_matrix);

    MPI_Datatype d_rmatrix; // to go from the extended matrix with ghost zones to the other one
    MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, d_type, &d_rmatrix);
    MPI_Type_commit(&d_rmatrix);



    // Set file view for each element
    MPI_Offset grid_start;
    MPI_File_get_position(input_file, &grid_start);

	
    MPI_File_set_view(input_file, grid_start + global_grid.m*local_nrows*p_size*coord[0] + local_ncols*p_size*coord[1], p_cell, matrix, "native", MPI_INFO_NULL);

    // allocate the cell array we will use
    cell2 **cells;
    cells = malloc(2*sizeof(cell2 *));
    double *sensors;
	
    cells[1] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2));
    cells[0] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2));
    sensors = calloc(local_nrows*local_ncols, sizeof(double));
	
    MPI_File_read_all(input_file, cells[0], 1, ematrix, MPI_STATUS_IGNORE);

    MPI_File_close(&input_file);

#ifdef DEBUG
    for(size_t i = 1; i < 1+local_nrows; i++)
	for(size_t j = 1; j < 1+local_ncols; j++)
	    fprintf(stderr, "%d - %d %f\n", rank, cells[0][i*(2+local_ncols)+j].type, cells[0][i*(2+local_ncols)+j].u);
#endif

    MPI_Datatype l_row; // local row
    MPI_Type_contiguous(local_ncols, d_type, &l_row);
    MPI_Type_commit(&l_row);

    MPI_Datatype l_col; // local column. A bit trickier, we need a type_vector.
    MPI_Type_vector(local_nrows, 1, local_ncols+2, d_type, &l_col);
    MPI_Type_commit(&l_col);

	
    int top, bot, left, right;
    double sqspeed = 0;

    int curr = 0, next = 0;
    char *alldump = malloc(256);

    for(int s = 0; s < instance.iteration; s++)
    {
	// We will update cell[next], and use the data of cell[curr]
	curr = s % 2;
	next = (s+1) % 2;
	    
	// We copy the edges of the grid.
	// We first need the ranks of the neighbours

	MPI_Cart_shift(comm, 0, 1, &top, &bot);
	MPI_Cart_shift(comm, 1, 1, &left, &right);
	    

	// Then we need to update the edges of our local grid
	// Update top and bottom rows
	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u),               1, l_row, top, 0,
		     &(cells[curr][(local_ncols+2)*(local_nrows+1)+1].u), 1, l_row, bot, 0,
		     comm, MPI_STATUS_IGNORE);
	
	MPI_Sendrecv(&(cells[curr][(local_ncols+2)*(local_nrows)+1].u),   1, l_row, bot, 0,
		     &(cells[curr][1].u),                                 1, l_row, top, 0,
		     comm, MPI_STATUS_IGNORE);
	
	// Update left and right
	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u),             1, l_col, left,  0,
		     &(cells[curr][1*(local_ncols+2)+local_ncols+1].u), 1, l_col, right, 0,
		     comm, MPI_STATUS_IGNORE);

	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+local_ncols].u),   1, l_col, right, 0,
		     &(cells[curr][1*(local_ncols+2)].u),               1, l_col, left,  0,
		     comm, MPI_STATUS_IGNORE);



	// We compute the update of the grid
	for(size_t i = 1; i < 1+local_nrows; i++)
	{
	    for(size_t j = 1; j < 1+local_ncols; j++)
	    {
		if(instance.step < 2 || cells[next][j+i*(2+local_ncols)].type != 1)
		{
		    // If walls we do not do anything
		    sqspeed = cells[0][j+i*(2+local_ncols)].s * cells[0][j+i*(2+local_ncols)].s;
		    cells[next][j+i*(2+local_ncols)].u = cells[curr][j+i*(2+local_ncols)].u + (cells[curr][j+i*(2+local_ncols)].v * instance.dt);
		    cells[next][j+i*(2+local_ncols)].v = cells[curr][j+i*(2+local_ncols)].v + sqspeed * (cells[curr][j+(i+1)*(2+local_ncols)].u + cells[curr][j+(i-1)*(2+local_ncols)].u + cells[curr][(j+1) + i*(2+local_ncols)].u + cells[curr][(j-1) + i*(2+local_ncols)].u - (4 * cells[curr][j+i*(2+local_ncols)].u)) * instance.dt;

		    if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2)
		    {
			// Case of sensors
			sensors[(j-1)+(i-1)*local_ncols] += cells[next][j+i*(2+local_ncols)].u * cells[next][j+i*(2+local_ncols)].u;
		    }
		}
		    
	    }
	}

	if(instance.alldump != NULL && s % instance.frequency == 0)
	{
	    MPI_File dump_file;

	    sprintf(alldump, instance.alldump, (s / instance.frequency));
	    MPI_File_open(comm, alldump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &dump_file);
		
	    MPI_File_set_view(dump_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL);
		
	    MPI_File_write_all(dump_file, &(cells[curr][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE);
	    MPI_File_close(&dump_file);


	}
    }

	
    if(instance.lastdump != NULL)
    {
	// bon, comment on fait ça ? peut être qu'en faisant un resize ça marche ?
	MPI_File last_file;
	MPI_File_open(comm, instance.lastdump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &last_file);
	MPI_File_set_view(last_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); // déjà, il y a un grid_strat en trop, d_type ou MPI_DOUBLE ?

	MPI_File_write_all(last_file, &(cells[next][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE);
	MPI_File_close(&last_file);
    }

    if(instance.step == 3 && instance.sensors != NULL)
    {
	MPI_File sensor_file;
	MPI_File_open(comm, instance.sensors, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &sensor_file);


	MPI_Datatype string;
	MPI_Type_contiguous(1024, MPI_CHAR, &string);
	MPI_Type_commit(&string);
	
	char text[1024];
	for(size_t i = 1; i < 1+local_nrows; i++)
	{
	    for(size_t j = 1; j < 1+local_ncols; j++)
	    {
		if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2)
		{
		    memset(text,0,sizeof(text));
		    sprintf(text, "%zu %zu %f\r\n", (i-1)+coord[0]*local_nrows, (j-1)+coord[1]*local_ncols, sensors[(j-1)+(i-1)*local_ncols]);
		    MPI_File_write(sensor_file, text, 1, string, MPI_STATUS_IGNORE);
		}
		    
	    }
	}
	    
	MPI_Type_free(&string);
	MPI_File_close(&sensor_file);
    }
	

    // Some cleaning
    free(cells);
    free(alldump);
    MPI_Type_free(&a_cell);
    MPI_Type_free(&p_cell);
    MPI_Type_free(&matrix);
    MPI_Type_free(&ematrix);
    MPI_Type_free(&d_type);
    MPI_Type_free(&d_matrix);
    MPI_Type_free(&d_rmatrix);
    MPI_Type_free(&l_row);
    MPI_Type_free(&l_col);
}
Esempio n. 22
0
int main(int argc, char **argv) {
	if(argc < 2) {
		printf("Usage: %s infile\n", argv[0]);
		exit(1);
	}

	MPI_Comm comm = MPI_COMM_WORLD;
	MPI_Info mpi_info = MPI_INFO_NULL;
	MPI_File fh, fw;
	MPI_Offset file_size, frag_size, read_size;
	MPI_Offset offset;
	MPI_Status status;
	int retval;
	double start, end;

	unsigned char *buf, *outbuf, *outProps;
	size_t destlen;
	size_t propsize = 5;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(comm, &mpi_rank);
	MPI_Comm_size(comm, &mpi_size);

	MPI_Barrier(comm);
	start = MPI_Wtime();
	/*
	 * read
	 */
	MPI_File_open(comm, argv[1], MPI_MODE_RDONLY, mpi_info, &fh);
	MPI_File_get_size(fh, &file_size);
	//printf("file size:%d\n", file_size);

	frag_size = file_size / mpi_size;
	offset = frag_size * mpi_rank;
	read_size = MIN(frag_size, file_size - offset);
	//printf("rank %d offset %d\n", mpi_rank, offset);

	buf = malloc(frag_size + 2);
	assert(buf != NULL);
	MPI_File_open(comm, argv[1], MPI_MODE_RDONLY, mpi_info, &fh);
	MPI_File_read_at(fh, offset, buf, read_size, MPI_CHAR, &status);
	MPI_File_close(&fh);

	/*
	 * compress
	 */
	destlen = 1.2 * frag_size + 1024 * 1024;
	outbuf = (unsigned char *)malloc(destlen);
	assert(outbuf != NULL);
	destlen = destlen - DATA_OFFSET -propsize;
	outProps = outbuf + DATA_OFFSET;
	retval = LzmaCompress(outbuf + DATA_OFFSET + propsize, &destlen, buf, read_size, outProps, &propsize, -1, 0, -1, -1, -1, -1, 1);
	if(retval != SZ_OK) {
		error_print(retval);
		free(buf);
		free(outbuf);
		exit(1);
	}

	/*
	 * write
	 */
	char *fwname;
	unsigned long long *len;
	fwname = get_fwname(argv[1]);
	len = (unsigned long long *)outbuf;
	*len = read_size;
	//printf("%s %d\n", fwname, destlen);
	MPI_File_open(MPI_COMM_SELF, fwname, MPI_MODE_WRONLY | MPI_MODE_CREATE, mpi_info, &fw);
	MPI_File_set_size(fw, destlen);
	MPI_File_write(fw, outbuf, destlen + DATA_OFFSET + propsize, MPI_CHAR, &status);
	MPI_File_close(&fw);

	MPI_Barrier(comm);
	end = MPI_Wtime();

	size_t cmprs_len;
	double cmprs_ratio;
	MPI_Reduce(&destlen, &cmprs_len, 1, MPI_UNSIGNED_LONG, MPI_SUM, 0, comm);
	if(0 == mpi_rank) {
		cmprs_ratio = (double)cmprs_len / file_size;
		printf("file size: %lu\n", file_size);
		printf("after compressed: %lu\n", cmprs_len);
		printf("compress ratio: %f\n", cmprs_ratio);
		printf("number of processes: %d\n", mpi_size);
		printf("time used: %fs\n", end - start);
	}
	MPI_Finalize();
	free(fwname);
	free(buf);
	free(outbuf);
	return 0;
}
Esempio n. 23
0
/*
 * There are three stages of execution in this routine.  
 * 
 * 1.   Data in an IO group is gathered together.  An IO group consists of an
 *      integer number of compute node layers and a single IO node, and the IO
 *      node is where the data is gathered.
 * 
 * 2.   The data is transposed to have the desired layout in memory.  Data on
 *      a compute node is stored as [z][x][y].  After gathering to the IO node,
 *      the ordering of compute nodes results in a data layout of 
 *      [l][h][z][y][x] where l iterates over layers and h iterates over rows.
 *      We wish to transpose it to be [z][y][x], both because this is a more
 *      intuitive ordering for data during analysis, and because z is the final
 *      remaining distributed dimension, and a raw combination of the data in
 *      all the IO nodes will now result in a well ordered layout.
 * 
 * 3.   The set of all IO nodes perform a parallel write to disk, resulting in
 *      a single file with an expected ordering.
 * 
 *      Note:  The reason compute nodes store data as [z][x][y] instead of 
 *             [z][y][x] is so that after an FFT operation (and it's required
 *             transposed) spectral modes are stored in [kz][ky][kx].  This 
 *             is a simpler ordering to remember, and in the code we are far
 *             more likely to iterate over individual dimensions in spectral
 *             coordinates than spatial ones. IO happens rarely enough that
 *             the cost of the extra transpose required here is probably
 *             negligible, though this should be verified.
 */
void writeSpatial(field * f, char * name)
{
    int i,j,k,l,m;
    debug("Writing spatial data to file %s\n", name);

    int sndcnt = 0;
    PRECISION * rcvbuff = 0;
    PRECISION * sndbuff = 0;
    
    //the extra +1 just gives a little extra room to do an extra loop below.
    //The extra element means nothing, it just makes the code a hair easier
    //to write
    int displs[iosize+1];
    int rcvcounts[iosize];

    debug("consolidating data to IO nodes\n");
    if(compute_node)
    {
        sndcnt = my_x->width * my_z->width * ny;
        trace("Sending %d PRECISIONs\n", sndcnt);
        MPI_Gatherv(f->spatial, sndcnt, MPI_PRECISION, 0, 0, 0, MPI_PRECISION, 0, iocomm);
        debug("Write Spatial completed\n");
        return;
    }
    else if(io_node)
    {
        rcvbuff = (PRECISION *)malloc(nx * ny * nz_layers * sizeof(PRECISION));
        sndbuff = (PRECISION *)malloc(nx * ny * nz_layers * sizeof(PRECISION));
        trace("Total local data will be %d PRECISIONs\n", nx*ny*nz_layers);

        //We need to calculate the starting index that data from each compute
        //processor will begin at in our array.
        //Note:  Since our own IO node is not contributing any data, both our
        //       IO node and the first compute node get to start at a 
        //       displacement of 0.
        displs[0] = 0;
        displs[1] = 0;
        rcvcounts[0] = 0;

        //staggered loop.  We calculate how much data we receive from one
        //processor at the same time we calculate where the data for the
        //next processor will begin storage.
        int * pidspls = displs + 2;
        int * pircvcounts = rcvcounts+1;
        for(i = io_layers[my_io_layer].min; i <= io_layers[my_io_layer].max; i++)
        {
            for(j = 0; j < hdiv; j++)
            {
                *pircvcounts = all_x[j].width * all_z[i].width * ny;
                *pidspls = *(pidspls-1) + *pircvcounts;
                trace("Proc %d should send %d PRECISIONs at displacement %d\n", hdiv * i + j, *pircvcounts, *pidspls);
                pidspls++;
                pircvcounts++;
            }
        }
        MPI_Gatherv(0, 0, MPI_PRECISION, rcvbuff, rcvcounts, displs, MPI_PRECISION, 0, iocomm);
    }

    debug("transposing data so it is properly contiguous\n");
    //rcvbuff is [l][h][vz][hx][y]
    //we want [lz][y][x]
    int indexr = 0;
    int indexs = 0;
    for(i = 0; i < io_layers[my_io_layer].width; i++)
    {
        for(j = 0; j < hdiv; j++)
        {
            int vz = all_z[i + io_layers[my_io_layer].min].width;
            int vzmin = all_z[i + io_layers[my_io_layer].min].min;
            int vzstart = all_z[io_layers[my_io_layer].min].min;
            for(k = 0; k < vz; k++)
            {
                int hx = all_x[j].width;
                int hxmin = all_x[j].min;
                for(l = 0; l < hx; l++)
                {
                    for(m = 0; m < ny; m++)
                    {
                        indexs = ((k + vzmin - vzstart)*ny + m)*nx + l + hxmin;
                        sndbuff[indexs] = rcvbuff[indexr];
                        indexr++;
                    }
                }
            }
        }
    }



    debug("Performing parallel file write\n");
    //TODO: revisit MPI_MODE_SEQUENTIAL and MPI_INFO_NULL to make sure these are what we want
    MPI_File fh;
    MPI_File_open(fcomm, name, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
    debug("MPI File opened successfully\n");
    
    //Calculate displacements for each IO processor into the full file.
    int disp = 0;
    
    //loop over each IO processor before us
    for(i = 0; i < my_io_layer; i++)
    {
        //calculate how many layers those IO processors contribute
        for(j = io_layers[i].min; j <= io_layers[i].max; j++)
        {
            disp += all_z[j].width;
        }
    }
    //Convert layers into actual data size.
    disp *= nx * ny * sizeof(PRECISION);
    
    trace("Our view starts at element %d\n", disp);
    trace("Setting view...\n");
    MPI_File_set_view(fh, disp, MPI_PRECISION, MPI_PRECISION, "native", MPI_INFO_NULL);
    trace("Writing to file...\n");
    MPI_File_write(fh, sndbuff, nx * ny * nz_layers, MPI_PRECISION, MPI_STATUS_IGNORE );
    MPI_File_close(&fh);

    free(sndbuff);
    free(rcvbuff);

    debug("Write Spatial completed\n");

}
Esempio n. 24
0
static int test_indexed_with_zeros(char *filename, int testcase)
{
    int i, rank, np, buflen, num, err, nr_errors=0;
    int  nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN];
    MPI_File fh;
    MPI_Status status;
    MPI_Datatype filetype;
    MPI_Datatype types[MAXLEN];
    MPI_Aint addrs[MAXLEN];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    /* set up the number of integers to write in each iteration */
    for (i=0; i<MAXLEN; i++) nelms[i] = 0;
    if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1;
    if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1;

    /* pre-fill the file with integers -999 */
    if (rank == 0) {
        for (i=0; i<MAXLEN; i++) buf[i] = -999;
	err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY,
		MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    /* define a filetype with spurious leading zeros */
    buflen = num = 0;
    for (i=0; i<MAXLEN; i++) {
        buflen       += nelms[i];
        indices[num]  = i;
        addrs[num] = i*sizeof(int);
        blocklen[num] = nelms[i];
        types[num] = MPI_INT;
        num++;
    }
    switch (testcase) {
	case INDEXED:
	    MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype);
	    break;
	case HINDEXED:
	    MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype);
	    break;
	case STRUCT:
	    MPI_Type_create_struct(num, blocklen, addrs, types, &filetype);
	    break;
	default:
	    fprintf(stderr, "unknown testcase!\n");
	    return(-100);

    }

    MPI_Type_commit(&filetype);

    /* initialize write buffer and write to file*/
    for (i=0; i<MAXLEN; i++) buf[i] = 1;
    err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
    err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view");
    err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all");
    MPI_Type_free(&filetype);
    err = MPI_File_close(&fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");

    /* read back and check */
    if (rank == 0) {
        err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
        for (i=0; i<MAXLEN; i++) {
            if (buf[i] < 0) {
		nr_errors++;
                printf("Error: unexpected value for case %d at buf[%d] == %d\n",
			testcase,i,buf[i]);
	    }
	}
    }
    return nr_errors;
}
Esempio n. 25
0
int main(int argc, char *argv[])
{
    int iarrayOfSizes[2], iarrayOfSubsizes[2], iarrayOfStarts[2], ilocal_size;
    int nproc[2], periods[2], icoord[2];
    int m, n, i, j, wsize, wrank, crank, ndims, lrows, lcols, grow, gcol, err;
    MPI_Datatype filetype;
    MPI_File     fh;
    MPI_Comm     cartcomm;
    MPI_Info     info0, info3;
    double       t, topen, twrite, tclose, wrate;
    double       *local_array;
    char         nstripesStr[12], stripeUnitStr[12];
    int          nstripes = -1;
    int          stripeUnit = -1;
    MPI_Offset   headerSize = 0;

    MPI_Init(0,0);

    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);

    /* Get global array size */
    m = n = 128;      /* Set default size */

    /* ioda [ n ] [ m ] [ nstripes ] [ stripeunit ] [ headersize ] */
    if (argc > 0) {
	if (argc > 1) m = atoi(argv[1]);
	if (argc > 2) n = atoi(argv[2]);
	if (argc > 3) nstripes = atoi(argv[3]);
	if (argc > 4) stripeUnit = atoi(argv[4]);
        if (argc > 5) headerSize = atoi(argv[5]);
	if (argc > 6) {
	    if (wrank == 0)
		fprintf(stderr,"Unrecognized argument %s\n", argv[6]);
	    MPI_Abort(MPI_COMM_WORLD,1);
	}
    }
    if (wrank == 0) printf("Matrix is [%d,%d]; file dir = %s\n", m, n, MYSCRATCHDIR );

    /* The default number of stripes = totalsize/1M */
    if (nstripes < 0) {
	nstripes = n * m * sizeof(double) / (1024*1024);
	if (nstripes < 1) nstripes = 1;
    }
    if (wrank == 0) printf("nstripes = %d, stripeUnit = %d, header size = %d\n",
                           nstripes, stripeUnit, (int)headerSize);

    /* Use topology routines to get decomposition and coordinates */
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
    nproc[0] = 0; nproc[1] = 0;
    ndims = 2;
    MPI_Dims_create(wsize, ndims, nproc);
    periods[0] = 0; periods[1] = 0;
    MPI_Cart_create(MPI_COMM_WORLD, ndims, nproc, periods, 1, &cartcomm);
    MPI_Comm_rank(cartcomm, &crank);
    MPI_Cart_coords(cartcomm, crank, ndims, icoord);

    iarrayOfSizes[0]    = m;
    iarrayOfSizes[1]    = n;
    iarrayOfSubsizes[0] = m/nproc[0];
    iarrayOfSubsizes[1] = n/nproc[1];
    iarrayOfStarts[0]   = icoord[0] * iarrayOfSubsizes[0];
    iarrayOfStarts[1]   = icoord[1] * iarrayOfSubsizes[1];

    /* Initialize my block of the data */
    ilocal_size = iarrayOfSubsizes[0] * iarrayOfSubsizes[1];
    lrows = iarrayOfSubsizes[0];
    lcols = iarrayOfSubsizes[1];
    local_array = (double *)malloc(lrows*lcols*sizeof(double));
    gcol  = iarrayOfStarts[1];
    grow = iarrayOfStarts[0];
    for (i=0; i<lrows; i++) {
	for (j=0; j<lcols; j++) {
	    local_array[j*lrows+i] = (grow+i) + (gcol+j)*m;
	}
    }

    /* Fortran order simply means the data is stored by columns */
    MPI_Type_create_subarray(ndims, iarrayOfSizes, iarrayOfSubsizes,
			     iarrayOfStarts, MPI_ORDER_FORTRAN, MPI_DOUBLE,
			     &filetype);
    MPI_Type_commit(&filetype);

    info0 = MPI_INFO_NULL;
    info3 = MPI_INFO_NULL;
    if (nstripes > 0 || stripeUnit > 0) {
	MPI_Info_create(&info0);
	if (nstripes > 0) {
	    snprintf(nstripesStr, sizeof(nstripesStr), "%d", nstripes);
	    MPI_Info_set(info0, "striping_factor", nstripesStr);
	    MPI_Info_set(info0, "cb_nodes", nstripesStr);
	}
	if (stripeUnit > 0) {
	    snprintf(stripeUnitStr, sizeof(stripeUnitStr), "%d", stripeUnit);
	    MPI_Info_set(info0, "striping_unit", stripeUnitStr);
	}
	MPI_Info_dup(info0, &info3);
	MPI_Info_set(info3, "romio_no_indep_rw", "true");

	/* Other hints to consider:
	   direct_io=true

	   The default cb_buffer_size is 16777216 , but is overridden by the
	   striping unit, which is smaller by default.
	*/
    }

    /* level - 3 */
    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-3.out",
			MPI_MODE_CREATE | MPI_MODE_RDWR, info3, &fh);
    topen = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "open testfile-3.out");

    if (headerSize > 0) {
        /* Simulate writing a header */
        if (wrank == 0) {
	    char *header;
            header = (char *)calloc(1,(size_t)headerSize);
            MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
            free(header);
        }
        MPI_Barrier(cartcomm);
    }

    MPI_File_set_view(fh, headerSize, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL);

    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_write_all(fh, local_array, ilocal_size, MPI_DOUBLE,
			     MPI_STATUS_IGNORE);
    twrite = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "collective write");

    err = MPI_File_close(&fh);
    tclose = MPI_Wtime() - t;
    /* tclose is the time for the write(s) + the close, in case the
       implementation delays (some of) the writes until the close */
    if (err != MPI_SUCCESS) myAbort(err, "close testfile-3.out");

    MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    if (twrite > 0)
	wrate = (double)m * (double)n * sizeof(double)/twrite;
    if (wrank == 0)
	printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
	       twrite, tclose, wrate);

    /* level - 0 */
    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-0.out",
			MPI_MODE_CREATE | MPI_MODE_RDWR, info0, &fh);
    topen = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "open testfile-0.out");

    if (headerSize > 0) {
        /* Simulate writing a header */
        if (wrank == 0) {
	    char *header;
            header = (char *)calloc(1,(size_t)headerSize);
            MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
            free(header);
        }
        MPI_Barrier(cartcomm);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    gcol = iarrayOfStarts[1];
    grow = iarrayOfStarts[0];
    for (j=0; j<lcols; j++) {
	MPI_Offset offset = headerSize +
	    ((MPI_Offset)(grow) + (MPI_Offset)(gcol+j)*m) * sizeof(double);
	err = MPI_File_write_at(fh, offset, local_array+j*lrows, lrows, MPI_DOUBLE,
				MPI_STATUS_IGNORE);
	if (err != MPI_SUCCESS) myAbort(err, "write at");
    }
    twrite = MPI_Wtime() - t;

    err = MPI_File_close(&fh);
    tclose = MPI_Wtime() - t;
    /* tclose is the time for the write(s) + the close, in case the
       implementation delays (some of) the writes until the close */
    if (err != MPI_SUCCESS) myAbort(err, "close testfile-0");

    MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    if (twrite > 0)
	wrate = (double)m * (double)n * sizeof(double)/twrite;
    if (wrank == 0)
	printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
	       twrite, tclose, wrate);

    if (info0 != MPI_INFO_NULL) {
	MPI_Info_free(&info0);
	MPI_Info_free(&info3);
    }
    free(local_array);
    MPI_Finalize();
    return 0;
}
Esempio n. 26
0
int main (int argc, char **argv)
{
    struct arguments arguments;

    /* Parse our arguments; every option seen by parse_opt will
       be reflected in arguments. */
    argp_parse (&argp, argc, argv, 0, 0, &arguments); 

    int run_type;
    run_type = 0; //default is serial
    if (sscanf (arguments.args[0], "%i", &run_type)!=1) {}

    int iterations;
    iterations = 0; //default is serial
    if (sscanf (arguments.args[1], "%i", &iterations)!=1) {}

    int count_when;
    count_when = 1000;
    if (sscanf (arguments.args[2], "%i", &count_when)!=1) {}

    char print_list[200]; //used for input list
    if (sscanf (arguments.args[3], "%s", &print_list)!=1) {}

    // printf("Print list = %s\n", print_list);

    //Extract animation list from arguments
    char char_array[20][12] = { NULL };   //seperated input list
    int animation_list[20][2] = { NULL }; //integer input list start,range
    char *tok = strtok(print_list, ",");

    //counters
    int i,j,k,x,y,ii,jj;
    ii = 0;
    jj = 0;

    //Loop over tokens parsing our commas
    int tok_len = 0;
    while (tok != NULL)
    {
        //first loop parses out commas
        tok_len = strlen(tok);
        for (jj=0;jj<tok_len;jj++)
        {
            char_array[ii][jj] = tok[jj];
        }

        // printf("Tok = %s\n", char_array[ii]);
        tok = strtok(NULL, ",");
        ii++;
    }

    //looking for a range input, convert to ints
    int stop;
    for (ii=0;ii<20;ii++)
    {
        //convert first number to int
        tok = strtok(char_array[ii], "-");
        if (tok != NULL)
        {
            animation_list[ii][0] = atoi(tok);
            tok = strtok(NULL, ",");
        }
        
        //look for second number, add to range
        if (tok != NULL)
        {
            stop = atoi(tok);
            animation_list[ii][1] = stop - animation_list[ii][0];
        }

        // if (rank == 0)
        // {
        //     printf("Animation_list = %i, %i\n", 
        //         animation_list[ii][0], animation_list[ii][1]);

        // }
    }
    
    
    

    //should an animation be generated
    //prints a bunch of .pgm files, have to hand
    //make the gif...
    int animation;
    animation = arguments.animation;

    //verbose?
    int verbose;
    verbose = arguments.verbose;
    // printf("VERBOSE = %i",verbose);
    if (verbose>0 && verbose<=10)
    {
        verbose = 1;
    }

    

    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Get the name of the processor
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);

    //Print run information, exit on bad command line input
    if (rank == 0 && verbose == 1)
    {
        printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n",
            verbose,run_type,iterations,count_when, animation);
    }
    if (world_size>1 && run_type ==0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (world_size==1 && run_type>0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (count_when <= 0)
    {
        if (rank == 0)
        {
            printf("Invalid count interval, positive integers only\n");
        }
        MPI_Finalize();
        exit(0);
    }

     //serial
    if (world_size == 1 && run_type == 0)
    {

        ncols=1;
        nrows=1;
    }
    //Blocked
    else if (world_size>1 && run_type == 1)
    {
        ncols = 1;
        nrows = world_size;
        my_col = 0;
        my_row = rank;
    }
    //Checker
    else if (world_size>1 && run_type == 2)
    {
        ncols = (int)sqrt(world_size);
        nrows = (int)sqrt(world_size);

        my_row = rank/nrows;
        my_col = rank-my_row*nrows;

        if (ncols*nrows!=world_size)
        {
            if (rank == 0)
            {
                printf("Number of processors must be square, Exiting\n");
            }
            MPI_Finalize();
            exit(0);
        }
    }

    // if (verbose == 1)
    // {
    //     printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col);
    // }

    
    //////////////////////READ IN INITIAL PGM////////////////////////////////
    if(!readpgm("life.pgm"))
    {
        // printf("WR=%d,HERE2\n",rank);
        if( rank==0 )
        {
            pprintf( "An error occured while reading the pgm file\n" );
        }
        MPI_Finalize();
        return 1;
    }

    // Count the life forms. Note that we count from [1,1] - [height+1,width+1];
    // we need to ignore the ghost row!
    i = 0;
    for(y=1; y<local_height+1; y++ )
    {
        for(x=1; x<local_width+1; x++ )
        {
            if( field_a[ y * field_width + x ] )
            {
                i++;
            }
        }
    }
    // pprintf( "%i local buggies\n", i );

    int total;
    MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if( rank==0  && verbose == 1 )
    {
        pprintf( "%i total buggies\n", total );
    }
    

    
    // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col);

    //Row and column size per processor
    int rsize, csize; 
    rsize = local_width;
    csize = local_height;


    if (rank == 0 && verbose == 1)
    {
        printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size);
    }
    
    //Create new derived datatype for writing to files
    MPI_Datatype submatrix;

    int array_of_gsizes[2];
    int array_of_distribs[2];
    int array_of_dargs[2];
    int array_of_psize[2];

    if (run_type == 1)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }
    else if (run_type == 2)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width*nrows;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }



    MPI_Barrier(MPI_COMM_WORLD);

    //////////////////ALLOCATE ARRAYS, CREATE DATATYPES/////////////////////

    //Create new column derived datatype
    MPI_Datatype column;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column);
    MPI_Type_commit(&column);

    //Create new row derived datatype
    MPI_Datatype row;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row);
    MPI_Type_commit(&row);

    //allocate arrays and corner storage
    unsigned char *section;
    unsigned char *neighbors;
    //to use
    unsigned char *top;
    unsigned char *bot;
    unsigned char *left;
    unsigned char *right;
    //to send
    unsigned char *ttop;
    unsigned char *tbot;
    unsigned char *tleft;
    unsigned char *tright;
    //MALLOC!!
    section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    top = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    bot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    left = (unsigned char*)malloc(csize*sizeof(unsigned char));
    right = (unsigned char*)malloc(csize*sizeof(unsigned char));
    ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tleft = (unsigned char*)malloc(csize*sizeof(unsigned char));
    tright = (unsigned char*)malloc(csize*sizeof(unsigned char));

    //corners
    unsigned char topleft,topright,botleft,botright; //used in calculations
    unsigned char ttopleft,ttopright,tbotleft,tbotright; 
    topleft = 255;
    topright = 255;
    botleft = 255;
    botright = 255;

    //used for animation, each process will put there own result in and then
    //each will send to process 1 which will add them up
    unsigned char* full_matrix;
    unsigned char* full_matrix_buffer;
    if (animation == 1)
    {
        int msize1 = rsize*ncols*csize*nrows;
        full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        for (i=0; i<msize1; i++)
        {
            full_matrix[i] = 0;
            full_matrix_buffer[i] = 0;
        }
    }

    
    // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height);

    //Serial initialize vars
    int count = 0;
    if (world_size == 1 && run_type == 0)
    {
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }
        // printf("COUNT 4 = %d\n", count);
    }

    //Blocked/Checkered initializing variables
    else if (world_size > 1 && (run_type == 1 || run_type == 2))
    {
        //initialize
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }

        // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD );
        // if (rank == 0)
        // {
        //     printf("COUNT 4 = %d\n", total);
        // }
        
    }


    //header/footer for mpio writes
    char header1[15];
    header1[0] = 0x50;
    header1[1] = 0x35;
    header1[2] = 0x0a;
    header1[3] = 0x35;
    header1[4] = 0x31;
    header1[5] = 0x32;
    header1[6] = 0x20;
    header1[7] = 0x35;
    header1[8] = 0x31;
    header1[9] = 0x32;
    header1[10] = 0x0a;
    header1[11] = 0x32;
    header1[12] = 0x35;
    header1[13] = 0x35;
    header1[14] = 0x0a;

    char footer;
    footer = 0x0a;

    //make a frame or not?
    int create_frame = 0;

    //send to 
    int send_to;
    int receive_from;
    int info[5];
    info[2] = rank;
    info[3] = rsize;
    info[4] = csize;
    unsigned char info2[4];
    info2[0] = topleft;
    info2[1] = topright;
    info2[2] = botleft;
    info2[3] = botright;

    int current_count;
    int location;

    //Gameplay
    for (k=0;k<iterations;k++)
    {
        //Count buggies
        if (k%count_when==0)
        {
            if (verbose == 1)
            {
                current_count = rsize*csize-count_buggies(rsize,csize,section);
                MPI_Allreduce( &current_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
                if (rank == 0)
                {
                    printf("Iteration=%5d,  Count=%6d\n", k,total);
                }
                ////corner debug
                // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright);
            }
        }

        
        //Write to file serially for comparison
        //If animation is requested
        if (animation == 1 && run_type == 0)
        {
            //Put smaller matrix part into larger matrix
            for (i=0; i<csize; i++)
            {
                for (j=0; j<rsize; j++)
                {
                    location = (my_row*csize*rsize*ncols + my_col*rsize + 
                                    i*rsize*ncols + j);

                    full_matrix_buffer[location] = section[i*rsize+j];
                }
                // if (rank == 0)
                // {
                //     printf("Location = %d\n", location);
                // }
            }

            //Gather matrix
            MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, 
                MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD);

            
            if (rank == 0 && run_type == 0)
            {
                write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix);
            }
        }
        //mpio write pgm
        else if (animation == 1 && (run_type == 1 || run_type == 2))
        {
            //default is no frame
            create_frame = 0;
            for (ii=0;ii<20;ii++)
            {
                for (jj=0;jj<animation_list[ii][1]+1;jj++)
                {
                    // if (rank == 0)
                    // {
                    //     printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n",
                    //         animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0);
                    // }
                    if ((animation_list[ii][0] + jj - k) == 0)
                    {

                        create_frame = 1;
                        break;
                    }
                }
            }

            if (create_frame == 1)
            {
               //dynamic filename with leading zeroes for easy conversion to gif
                char buffer[128];
                snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k);

                /* open the file, and set the view */
                MPI_File file;
                MPI_File_open(MPI_COMM_WORLD, buffer, 
                              MPI_MODE_CREATE|MPI_MODE_WRONLY,
                              MPI_INFO_NULL, &file);

                MPI_File_set_view(file, 0,  MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                                       "native", MPI_INFO_NULL);

                //write header
                MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE);

                //write matrix
                MPI_File_set_view(file, 15,  MPI_UNSIGNED_CHAR, submatrix, 
                                       "native", MPI_INFO_NULL);

                MPI_File_write_all(file, section, rsize*csize, 
                        MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

                //write footer (trailing newline)
                MPI_File_set_view(file, 15+rsize*ncols*csize*nrows,  
                        MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                        "native", MPI_INFO_NULL);

                MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); 
            } 
        }


        // BLOCKED COMMUNITATION //
        if (run_type == 1)
        {
            //change bot (send top) to account for middle area
            //alternate to avoid locking
            send_to = rank - 1;
            receive_from = rank + 1;

            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //send top, receive bot
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //change top to account for middle area
            //alternate to avoid locking
            send_to = rank + 1;
            receive_from = rank - 1;

            //send bot, receive top
            if (rank%2==0)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (receive_from<world_size && receive_from >= 0)
                {
                    //*data,count,type,from,tag,comm,mpi_status
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }

                if (send_to<world_size && send_to>=0)
                {
                    //*data,count,type,to,tag,comm
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }
        }

        // CHECKERED COMMUNITATION //
        else if (run_type == 2)
        {
            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //corners
            ttopleft = tleft[0];
            tbotleft = tleft[csize-1];
            ttopright = tright[0];
            tbotright = tright[csize-1];

            //Send top, receive bot
            send_to = rank - nrows;
            receive_from = rank + nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send bot, receive top
            send_to = rank + nrows;
            receive_from = rank - nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send left, receive right
            send_to = rank - 1;
            receive_from = rank + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send right, receive left
            send_to = rank + 1;
            receive_from = rank - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topright, receive botleft
            send_to = rank - ncols + 1;
            receive_from = rank + ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topleft, receive botright
            send_to = rank - ncols - 1;
            receive_from = rank + ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botleft, receive topright
            send_to = rank + ncols - 1;
            receive_from = rank - ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botright, receive topleft
            send_to = rank + ncols + 1;
            receive_from = rank - ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }


            info2[0] = topleft;
            info2[1] = topright;
            info2[2] = botleft;
            info2[3] = botright;

        }
 
        // if (rank == 1){
        //     print_matrix(rsize, 1, top);
        //     print_matrix(rsize, csize, section);
        //     print_matrix(rsize, 1, bot);
        //     printf("\n");
        // }
        // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize);
        


        /////////// CELL UPDATES /////////////////
        //count neighbor
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                info[0] = i;
                info[1] = j;
                neighbors[i*rsize+j] = count_neighbors(info, info2, section, 
                                    top, bot, left, right);
                // printf("%i",neighbors[i*rsize+j]);
            }
            // printf("\n");
        }

        //update cells
        current_count = 0;
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                //cell currently alive
                if (section[i*rsize+j] == 0)
                {
                    //2 or 3 neighbors lives, else die
                    if (neighbors[i*rsize+j] < 2 || 
                        neighbors[i*rsize+j] > 3)
                    {
                        section[i*rsize+j] = 255;
                    }
                }
                else
                {
                    //Exactly 3 neighbors spawns new life
                    if (neighbors[i*rsize+j] == 3)
                    {
                        section[i*rsize+j] = 0;
                    }
                }
            }
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    sleep(0.5);
    //free malloc stuff
    if( field_a != NULL ) free( field_a );
    if( field_b != NULL ) free( field_b );
    free(section);
    free(neighbors);
    free(top);
    free(bot);
    free(left);
    free(right);

    MPI_Finalize();
    exit (0);
}    
Esempio n. 27
0
int main(int argc, char **argv)
{
    MPI_File fh;
    MPI_Status status;
    MPI_Offset size;
    long long *buf, i;
    char *filename;
    int j, mynod, nprocs, len, flag, err;

    MPI_Init(&argc,&argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    if (nprocs != 1) {
	fprintf(stderr, "Run this program on one process only\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }

    i = 1;
    while ((i < argc) && strcmp("-fname", *argv)) {
	i++;
	argv++;
    }
    if (i >= argc) {
	fprintf(stderr, "\n*#  Usage: large -fname filename\n\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
    argv++;
    len = strlen(*argv);
    filename = (char *) malloc(len+1);
    strcpy(filename, *argv);
    fprintf(stderr, "This program creates an 4 Gbyte file. Don't run it if you don't have that much disk space!\n");

    buf = (long long *) malloc(SIZE * sizeof(long long));
    if (!buf) {
	fprintf(stderr, "not enough memory to allocate buffer\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  MPI_INFO_NULL, &fh);

    for (i=0; i<NTIMES; i++) {
	for (j=0; j<SIZE; j++)
	    buf[j] = i*SIZE + j;
	
	err = MPI_File_write(fh, buf, SIZE, MPI_DOUBLE, &status);
        /* MPI_DOUBLE because not all MPI implementations define
           MPI_LONG_LONG_INT, even though the C compiler supports long long. */
        if (err != MPI_SUCCESS) {
	    fprintf(stderr, "MPI_File_write returned error\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
    }

    MPI_File_get_size(fh, &size);
    fprintf(stderr, "file size = %lld bytes\n", size);

    MPI_File_seek(fh, 0, MPI_SEEK_SET);

    for (j=0; j<SIZE; j++) buf[j] = -1;

    flag = 0;
    for (i=0; i<NTIMES; i++) {
	err = MPI_File_read(fh, buf, SIZE, MPI_DOUBLE, &status);
        /* MPI_DOUBLE because not all MPI implementations define
           MPI_LONG_LONG_INT, even though the C compiler supports long long. */
        if (err != MPI_SUCCESS) {
	    fprintf(stderr, "MPI_File_write returned error\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	for (j=0; j<SIZE; j++) 
	    if (buf[j] != i*SIZE + j) {
		fprintf(stderr, "error: buf %d is %lld, should be %lld \n", j, buf[j], 
                                 i*SIZE + j);
		flag = 1;
	    }
    }

    if (!flag) fprintf(stderr, "Data read back is correct\n");
    MPI_File_close(&fh);

    free(buf);
    free(filename);
    MPI_Finalize(); 
    return 0;
}
Esempio n. 28
0
int main(int argc, char** argv){
    int irank, nrank;
    MPI_Init (&argc, &argv);
    MPI_Comm_size (MCW, &nrank);
    MPI_Comm_rank (MCW, &irank);

    double t1,t2;
    if(irank==0) t1 = MPI_Wtime();

    int nx, ny;
    int px, py;

    /* コピペゾーン */
    // (1) init dims
    int dims[2] = {0,0};
    MPI_Dims_create(nrank,2,dims);
    ny = (NY-1)/dims[0];
    nx = (NX-1)/dims[1];
    
    // (2) init cart
    int periods[2] = {0,0}; // 非周期境界
    MPI_Comm cart;
    MPI_Cart_create(MCW, 2, dims, periods, 0, &cart);

    int c[2];                   /* 座標 */
    MPI_Cart_coords(cart, irank, 2, c);
    py = c[0]; // c[2]は大きい順なのでyがc[0]
    px = c[1];

    double h = 1.0/NX;
    double dt = 0.1*h*h;
    double dth2 = dt/h/h;
    int i,j,k;
    int height = ny+2, width = nx+2;

    double (*u)[width];
    u = (double(*)[width])malloc(height*width*sizeof(double));
    u = (double(*)[width])(&u[1][1]);

    double (*un)[width];
    un = (double(*)[width])malloc(height*width*sizeof(double));
    un = (double(*)[width])(&un[1][1]);

    for (j=-1;j<ny+1;j++)
        for (i=-1;i<nx+1;i++){
            u[j][i] = 0.0;
        }
    // (y=0)
    if (py==0)
        for (i=-1;i<nx+1;i++){
            u[-1][i] = 1.0;
        }

    // (x=0)
    if (px==0)
        for (j=0;j<ny+1;j++){
            u[j][-1] = 0.5;
        }

    MPI_Datatype vedge;
    MPI_Type_vector(ny, 1, nx+2, MPI_DOUBLE, &vedge);
    MPI_Type_commit(&vedge);
    int north, south, east, west;
    MPI_Cart_shift(cart,0,1,&south,&north);
    MPI_Cart_shift(cart,1,1,&west,&east);

    /* loop start */
    for (k=0; k<2000; k++){
        for (j=0; j<ny; j++){
            for (i=0; i<nx; i++)
                un[j][i] = u[j][i] + ( -4*u[j][i] + u[j][i+1] + u[j][i-1] + u[j+1][i] + u[j-1][i] )*dth2;
        }
        for (j=0; j<ny; j++){
            for (i=0; i<nx; i++)
                u[j][i] = un[j][i];
        }

        MPI_Sendrecv(&u[ny-1][0], nx, MPI_DOUBLE, north, 0,
                     &u[-1][0], nx, MPI_DOUBLE, south, 0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&u[0][0], nx, MPI_DOUBLE, south, 0,
                     &u[ny][0], nx, MPI_DOUBLE, north, 0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);

        MPI_Sendrecv(&u[0][nx-1], 1, vedge, east, 0,
                     &u[0][-1], 1, vedge, west, 0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&u[0][0], 1, vedge, west, 0,
                     &u[0][nx], 1, vedge, east, 0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    } // end loop(k)

    /* setview */
    MPI_File udata;
    MPI_File_open(cart, "u.data", MPI_MODE_WRONLY | MPI_MODE_CREATE,
                  MPI_INFO_NULL, &udata);
    MPI_File_set_size(udata,0);

    int size[2] = {NY+1, LW*(NX+1)+1}, subsize[2], start[2];
// ... py,pxはcartesian座標
    subsize[0] = ny;
    subsize[1] = LW*nx;
    start[0] = py*ny+1;
    start[1] = LW*(px*nx+1);

    if (py == 0){ subsize[0]++; start[0]=0; }   /* 南端↓ */
    if (py == dims[0]-1) subsize[0]++;          /* 北端↑ */
    if (px == 0){ subsize[1]+=LW; start[1]=0; } /* 西端← */
    if (px == dims[1]-1) subsize[1]+=LW+1;      /* 東端→ */

    MPI_Datatype ftype;
    MPI_Type_create_subarray(2, size, subsize, start,
                             MPI_ORDER_C, MPI_CHAR, &ftype);
    MPI_Type_commit(&ftype);
    MPI_File_set_view(udata, 0, MPI_CHAR, ftype, "native",
                      MPI_INFO_NULL);

    /* output */
    MPI_Status st;
    char *wbuf = (char*)malloc((LW*(nx+2)+2)*sizeof(char));

    int jstart=0,istart=0, jend=ny, iend=nx;
    if(py==0) jstart = -1;
    if(py==dims[0]-1) jend = ny+1;
    if(px==0) istart = -1;
    if(px==dims[1]-1) iend = nx+1;

    for(j=jstart; j<jend; j++){
        for(i=istart,k=0; i<iend; i++,k+=LW){
            sprintf( wbuf+k, " %.15E %.15E %21.15E\n",
                     (i+1 + px*nx)*h, (j+1 + py*ny)*h, u[j][i] );
        }
        if( px == dims[1]-1 )     // 東端→
            sprintf(wbuf+(k++),"\n");
        MPI_File_write(udata,wbuf,k,MPI_CHAR,&st);
    }

    MPI_File_close(&udata);

    if(irank==0){
        t2 = MPI_Wtime();
        printf("%g\n",t2-t1);
    }

    MPI_Finalize ();

    return 0;
}
Esempio n. 29
0
/*----< main() >------------------------------------------------------------*/
int main(int argc, char **argv)
{
    int i, j, err, rank, np, num_io;
    char *buf, *filename;
    int rank_dim[2], array_of_sizes[2];
    int array_of_subsizes[2];
    int count, *blocklengths, global_array_size;
    MPI_Count ftype_size;
    MPI_Aint *displacements;
    MPI_File fh;
    MPI_Datatype ftype;
    MPI_Request *request;
    MPI_Status *statuses;
    MPI_Status status;
    MPI_Offset offset = 0;
    int nr_errors = 0;
#ifdef VERBOSE
    int k;
#endif

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    if (np != 4) {
        if (!rank)
            printf("Please run with 4 processes. Exiting ...\n\n");
        MPI_Finalize();
        return 1;
    }

    filename = (argc > 1) ? argv[1] : "testfile";

    num_io = 2;

    request = (MPI_Request *) malloc(num_io * sizeof(MPI_Request));
    statuses = (MPI_Status *) malloc(num_io * sizeof(MPI_Status));

    /*-----------------------------------------------------------------------*/
    /* process rank in each dimension */
    rank_dim[0] = rank / 2;
    rank_dim[1] = rank % 2;

    /* global 2D array size */
    array_of_sizes[0] = YLEN * 2;
    array_of_sizes[1] = XLEN * 2;

    global_array_size = array_of_sizes[0] * array_of_sizes[1];

    array_of_subsizes[0] = YLEN / 2;
    array_of_subsizes[1] = XLEN * SUB_XLEN / 5;

    offset = rank_dim[0] * YLEN * array_of_sizes[1] + rank_dim[1] * XLEN;

    /* define data type for file view */
    count = array_of_subsizes[0] * 2;   /* 2 is the no. blocks along X */
    blocklengths = (int *) malloc(count * sizeof(int));
    displacements = (MPI_Aint *) malloc(count * sizeof(MPI_Aint));
    for (i = 0; i < count; i++)
        blocklengths[i] = array_of_subsizes[1] / 2;
    for (i = 0; i < array_of_subsizes[0]; i++)
        for (j = 0; j < 2; j++)
            displacements[i * 2 + j] = offset + i * 2 * array_of_sizes[1]
                + j * XLEN / 2;
    MPI_Type_create_hindexed(count, blocklengths, displacements, MPI_CHAR, &ftype);
    MPI_Type_commit(&ftype);
    MPI_Type_size_x(ftype, &ftype_size);

/* subarray's layout in the global array

   P0's 's layout                               P1's layout
   [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
[ 0] 0 1 2     3 4 5                          |                       D E F     G H I
[ 1]                                          |
[ 2] 6 7 8     9 : ;                          |                       J K L     M N O
[ 3]                                          |
[ 4]                                          |
[ 5]                                          |
[ 6]                                          |
[ 7]                                          |
[ 8]                                          |
[ 9]                                          |

   P2's 's layout                               P3's layout
   [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
[ 0]                                          |
[ 1]                                          |
[ 2]                                          |
[ 3]                                          |
[ 4]                                          |
[ 5] X Y Z     [ \ ]                          |                       l m n     o p q
[ 6]                                          |
[ 7] ^ _ `     a b c                          |                       r s t     u v w
[ 8]                                          |
[ 9]                                          |
*/

    /* initialize the write buffer */
    buf = (char *) malloc(array_of_subsizes[0] * array_of_subsizes[1]);
    for (i = 0; i < array_of_subsizes[0] * array_of_subsizes[1]; i++)
        buf[i] = '0' + rank * 20 + i % 79;

    /* zero file contents --------------------------------------------------- */
    if (rank == 0) {
        char *wr_buf = (char *) calloc(num_io * global_array_size, 1);
        MPI_File_open(MPI_COMM_SELF, filename,
                      MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
        MPI_File_write(fh, wr_buf, num_io * global_array_size, MPI_CHAR, &status);
        MPI_File_close(&fh);
        free(wr_buf);
    }
    /* open the file -------------------------------------------------------- */
    err = MPI_File_open(MPI_COMM_WORLD, filename,
                        MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    if (err != MPI_SUCCESS) {
        printf("Error: MPI_File_open() filename %s\n", filename);
        MPI_Abort(MPI_COMM_WORLD, -1);
        exit(1);
    }

    /* MPI nonblocking collective write */
    for (i = 0; i < num_io; i++) {
        offset = i * global_array_size;
        /* set the file view */
        MPI_File_set_view(fh, offset, MPI_BYTE, ftype, "native", MPI_INFO_NULL);
        MPI_File_iwrite_all(fh, buf, ftype_size, MPI_CHAR, &request[i]);
    }
    MPI_Waitall(num_io, request, statuses);
    MPI_File_close(&fh);

    /* read and print file contents ----------------------------------------- */
    if (rank == 0) {
        char *ptr;
        char *rd_buf = (char *) calloc(num_io * global_array_size, 1);
        MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
        MPI_File_read(fh, rd_buf, num_io * global_array_size, MPI_CHAR, &status);
        MPI_File_close(&fh);

#ifdef VERBOSE
        printf("-------------------------------------------------------\n");
        printf("   [");
        for (i = 0; i < 2; i++) {
            for (j = 0; j < XLEN; j++)
                printf(" %d", j);
            printf(" ");
        }
        printf("]\n\n");


        ptr = rd_buf;
        for (k = 0; k < num_io; k++) {
            for (i = 0; i < 2 * YLEN; i++) {
                printf("[%2d]", k * 2 * YLEN + i);
                for (j = 0; j < 2 * XLEN; j++) {
                    if (j > 0 && j % XLEN == 0)
                        printf(" ");
                    if (*ptr != 0)
                        printf(" %c", *ptr);
                    else
                        printf("  ");
                    ptr++;
                }
                printf("\n");
            }
            printf("\n");
        }
#endif
        ptr = rd_buf;
        for (i = 0; i < 2 * YLEN * num_io; i++) {
            for (j = 0; j < 2 * XLEN; j++) {
                if (*ptr != compare_buf[i][j]) {
                    fprintf(stderr, "expected %d got %d at [%d][%d]\n",
                            *ptr, compare_buf[i][j], i, j);
                    nr_errors++;
                }
                ptr++;
            }
        }
        free(rd_buf);

        if (nr_errors == 0)
            fprintf(stdout, " No Errors\n");
        else
            fprintf(stderr, "Found %d errors\n", nr_errors);
    }

    free(blocklengths);
    free(displacements);
    free(buf);
    free(request);
    free(statuses);
    MPI_Type_free(&ftype);
    MPI_Finalize();
    return 0;
}
Esempio n. 30
0
//------------------------------------------------------------------------------
// Function to output non-magnetic atomic positions to disk
//------------------------------------------------------------------------------
void atoms_non_magnetic(){

      //------------------------------------------------------------
      // Determine non magnetic atoms to be outputted to coord list
      //------------------------------------------------------------

      // array of atom numbers to be outputted
      std::vector<uint64_t> atom_list(0);

      // get output bounds
      const double minB[3] = {atoms_output_min[0] * cs::system_dimensions[0],
                              atoms_output_min[1] * cs::system_dimensions[1],
                              atoms_output_min[2] * cs::system_dimensions[2]};

      const double maxB[3] = {atoms_output_max[0] * cs::system_dimensions[0],
                              atoms_output_max[1] * cs::system_dimensions[1],
                              atoms_output_max[2] * cs::system_dimensions[2]};

      // Determine non magnetic atoms to be outputted to coord list
      for (uint64_t atom = 0; atom < cs::non_magnetic_atoms_array.size(); atom++){

         const double cc[3] = {cs::non_magnetic_atoms_array[atom].x, cs::non_magnetic_atoms_array[atom].y, cs::non_magnetic_atoms_array[atom].z};

         // check atom within output bounds
         if ( (cc[0] >= minB[0]) && (cc[0] <= maxB[0]) ){
            if ( (cc[1] >= minB[1]) && (cc[1] <= maxB[1]) ){
               if ( (cc[2] >= minB[2]) && (cc[2] <= maxB[2]) ){
                  atom_list.push_back(atom); //non-magnetic atoms
               }
            }
         }

      }

      //------------------------------------------------
      // Create temporary buffers for atom information
      //------------------------------------------------
      uint64_t num_local_atoms = atom_list.size();
      uint64_t num_total_atoms = 0; // number of atoms across all processors

      #ifdef MPICF
         // calculate number of atoms to be output on all processors
         MPI_Allreduce(&num_local_atoms, &num_total_atoms, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
      #else
         num_total_atoms = num_local_atoms;
      #endif

      std::vector<int> atom_type_buffer(num_local_atoms);
      for(unsigned int atom = 0; atom < num_local_atoms; atom++) atom_type_buffer[atom] = cs::non_magnetic_atoms_array[ atom_list[atom] ].mat;

      std::vector<int> atom_category_buffer(num_local_atoms);
      for(unsigned int atom = 0; atom < num_local_atoms; atom++) atom_category_buffer[atom] = cs::non_magnetic_atoms_array[ atom_list[atom] ].cat;

      std::vector<double> atom_coord_buffer(3*num_local_atoms);
      for(unsigned int atom = 0; atom < num_local_atoms; atom++){
         const uint64_t atom_id = atom_list[atom]; // get atom array index
         atom_coord_buffer[3*atom + 0] = cs::non_magnetic_atoms_array[atom_id].x;
         atom_coord_buffer[3*atom + 1] = cs::non_magnetic_atoms_array[atom_id].y;
         atom_coord_buffer[3*atom + 2] = cs::non_magnetic_atoms_array[atom_id].z;
      }

      //------------------------------------------
      // Output Meta Data from root process
      //------------------------------------------
      // set number of files
      // const int files = config::internal::num_io_groups; // unused variable

      if(config::internal::mode != legacy && vmpi::my_rank == 0){
         config::internal::write_non_magnetic_meta(num_total_atoms);
      }

      //------------------------------------------
      // Output coordinate data
      //------------------------------------------

      // Determine output filename
      std::stringstream file_sstr;

      // set simple file name for single file output
      if(config::internal::num_io_groups == 1) file_sstr << "non-magnetic-atoms.data";
      // otherwise set indexed files
      else file_sstr << "non-magnetic-atoms-" << std::setfill('0') << std::setw(6) << config::internal::io_group_id << ".data";

      // convert string stream to string
      std::string filename = file_sstr.str();

      // Calculate number of bytes to be written to disk
      const double data_size = double(num_total_atoms) * 1.0e-9 * (3.0*double(sizeof(double) + 2.0*double(sizeof(int)) ) );

      // Output informative messages of actual data size to be outputed to disk (in binary mode)
      zlog << zTs() << "Total non-magnetic data filesize: " << 1000.0 * data_size << " MB" << std::endl;

      // Output informative message to log file on root process
      zlog << zTs() << "Outputting non-magnetic atomic coordinate file to disk ";

      // Variable for calculating output bandwidth
      double io_time = 1.0e-12;

      //-----------------------------------------------------
      // Parallel mode output
      //-----------------------------------------------------
      #ifdef MPICF

      // Determine io mode and call appropriate function for data
      switch(config::internal::mode){

         // legacy
         case config::internal::legacy:
            break;

         case config::internal::mpi_io:{
            vutil::vtimer_t timer; // instantiate timer
            MPI_File fh; // MPI file handle
            MPI_Status status; // MPI io status
            // convert filename to character string for output
            char *cfilename = (char*)filename.c_str();
            // Open file on all processors
            MPI_File_open(MPI_COMM_WORLD, cfilename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
            // write number of atoms on root process
            if(vmpi::my_rank == 0) MPI_File_write(fh, &num_total_atoms, 1, MPI_UINT64_T, &status);

            // Calculate local byte offsets since MPI-IO is simple and doesn't update the file handle pointer after I/O
            MPI_Offset type_offset     = config::internal::linear_offset + sizeof(uint64_t);
            MPI_Offset category_offset = config::internal::linear_offset + num_total_atoms * sizeof(int) + sizeof(uint64_t);
            MPI_Offset data_offset     = config::internal::buffer_offset + 2 * num_total_atoms * sizeof(int) + sizeof(uint64_t);

            timer.start(); // start timer

            // Write data to disk
            MPI_File_write_at_all(fh, type_offset, &atom_type_buffer[0], atom_type_buffer.size(), MPI_INT, &status);
            MPI_File_write_at_all(fh, category_offset, &atom_category_buffer[0], atom_category_buffer.size(), MPI_INT, &status);
            MPI_File_write_at_all(fh, data_offset, &atom_coord_buffer[0], atom_coord_buffer.size(), MPI_DOUBLE, &status);

            timer.stop(); // Stop timer

            // Calculate elapsed time
            io_time = timer.elapsed_time();

            // Close file
            MPI_File_close(&fh);
            break;
         }

         case config::internal::fpprocess:
            io_time = write_coord_data(filename, atom_coord_buffer, atom_type_buffer, atom_category_buffer);
            break;

         case config::internal::fpnode:{
            // Gather data from all processors in io group
            std::vector<int> collated_atom_type_buffer(0);
            collate_int_data(atom_type_buffer, collated_atom_type_buffer);
            std::vector<int> collated_atom_category_buffer(0);
            collate_int_data(atom_category_buffer, collated_atom_category_buffer);
            std::vector<double> collated_atom_coord_buffer(0);
            collate_double_data(atom_coord_buffer, collated_atom_coord_buffer);
            // output data on master io processes
            if(config::internal::io_group_master) io_time = write_coord_data(filename, collated_atom_coord_buffer, collated_atom_type_buffer, collated_atom_category_buffer);
            // find longest time in all io nodes
            double max_io_time = 0.0;
            // calculate actual bandwidth on root process
            MPI_Reduce(&io_time, &max_io_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
            io_time = max_io_time;
            break;
         }

      }

      #else
         //-----------------------------------------------------
         // Serial mode output (ignores most io directives)
         //-----------------------------------------------------
         // if new output (not legacy) then output non magnetic atoms
         if(config::internal::mode != config::internal::legacy) io_time = write_coord_data(filename, atom_coord_buffer, atom_type_buffer, atom_category_buffer);
      #endif

      // Output bandwidth to log file
      zlog << data_size/io_time << " GB/s in " << io_time << " s" << std::endl;

      return;

   }