示例#1
1
int main(int argc, char **argv)
{
    int *buf, i, mynod, nprocs, len, b[3];
    int errs=0, toterrs;
    MPI_Aint d[3];
    MPI_File fh;
    MPI_Status status;
    char *filename;
    MPI_Datatype typevec, newtype, t[3];
    MPI_Info info;

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

    if (nprocs != 2) {
        fprintf(stderr, "Run this program on two processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes (length first, then string) */
    if (!mynod) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    fprintf(stderr, "\n*#  Usage: noncontig -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+1);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+1);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }

    buf = (int *) malloc(SIZE*sizeof(int));

    MPI_Type_vector(SIZE/2, 1, 2, MPI_INT, &typevec);

    /* create a struct type with explicitly set LB and UB; displacements
     * of typevec are such that the types for the two processes won't
     * overlap.
     */
    b[0] = b[1] = b[2] = 1;
    d[0] = 0;
    d[1] = mynod*sizeof(int);
    d[2] = SIZE*sizeof(int);
    t[0] = MPI_LB;
    t[1] = typevec;
    t[2] = MPI_UB;

    /* keep the struct, ditch the vector */
    MPI_Type_struct(3, b, d, t, &newtype);
    MPI_Type_commit(&newtype);
    MPI_Type_free(&typevec);

    MPI_Info_create(&info);
    /* I am setting these info values for testing purposes only. It is
       better to use the default values in practice. */
    MPI_Info_set(info, "ind_rd_buffer_size", "1209");
    MPI_Info_set(info, "ind_wr_buffer_size", "1107");

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using independent I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  info, &fh);

    /* set the file view for each process -- now writes go into the non-
     * overlapping but interleaved region defined by the struct type up above
     */
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);

    /* fill our buffer with a pattern and write, using our type again */
    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_write(fh, buf, 1, newtype, &status);

    MPI_Barrier(MPI_COMM_WORLD);

    /* fill the entire buffer with -1's.  read back with type.
     * note that the result of this read should be that every other value
     * in the buffer is still -1, as defined by our type.
     */
    for (i=0; i<SIZE; i++) buf[i] = -1;
    MPI_File_read_at(fh, 0, buf, 1, newtype, &status);

    /* check that all the values read are correct and also that we didn't
     * overwrite any of the -1 values that we shouldn't have.
     */
    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if ((i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	    if (!(i%2) && (buf[i] != i)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if ((i%2) && (buf[i] != i + mynod*SIZE)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	    if (!(i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using independent I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  info, &fh);

    /* in this case we write to either the first half or the second half
     * of the file space, so the regions are not interleaved.  this is done
     * by leaving the file view at its default.
     */
    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_write_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status);

    MPI_Barrier(MPI_COMM_WORLD);

    /* same as before; fill buffer with -1's and then read; every other
     * value should still be -1 after the read
     */
    for (i=0; i<SIZE; i++) buf[i] = -1;
    MPI_File_read_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status);

    /* verify that the buffer looks like it should */
    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if ((i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	    if (!(i%2) && (buf[i] != i)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if ((i%2) && (buf[i] != i + mynod*SIZE)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	    if (!(i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using independent I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  info, &fh);

    /* set the file view so that we have interleaved access again */
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);

    /* this time write a contiguous buffer */
    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_write(fh, buf, SIZE, MPI_INT, &status);

    MPI_Barrier(MPI_COMM_WORLD);

    /* fill buffer with -1's; this time they will all be overwritten */
    for (i=0; i<SIZE; i++) buf[i] = -1;
    MPI_File_read_at(fh, 0, buf, SIZE, MPI_INT, &status);

    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if (buf[i] != i) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if (buf[i] != i + mynod*SIZE) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (mynod == 0) {
	if( toterrs > 0) {
	    fprintf( stderr, "Found %d errors\n", toterrs );
	}
	else {
	    fprintf( stdout, " No Errors\n" );
	}
    }
    MPI_Type_free(&newtype);
    MPI_Info_free(&info);
    free(buf);
    free(filename);
    MPI_Finalize();
    return 0;
}
int main (int argc, char *argv[])
{
    
    int proc_num, my_rank, len;
    int i, j;
    double start_time, elapsed_time, all_time;
    double all_time_max, all_time_avg, all_time_min;
    struct timespec ts;
    MPI_Status status;
    MPI_File fh;
    MPI_Datatype contig_type;

    MPI_Init(&argc, &argv);
    
    // get the number of procs and rank in the comm
    MPI_Comm_size(MPI_COMM_WORLD, &proc_num);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    
    if(argc != 5) {
        printf("Wrong argument number!\n");
        printf("Use %s filename request_size repeat_times\n", argv[0]);
        return 0;
    }

    int req_size       = atoi(argv[2]);
    int repeat_time    = atoi(argv[3]);
    double ht_read_time= atof(argv[4]);
    ht_read_time *= 2.0;
    //if(my_rank == 0)
    //    printf("Sleep time: %lf\n",ht_read_time);

    ts.tv_sec = (int)ht_read_time;
    ts.tv_nsec = (ht_read_time - ts.tv_sec) * 1000000000;


    MPI_Offset stride  = proc_num * req_size;
    MPI_Offset tmp_pos = my_rank * req_size;

    char *read_data    = (char*)malloc(req_size);
     
    MPI_Type_contiguous( req_size, MPI_CHAR, &contig_type);
    MPI_Type_commit(&contig_type);


    start_time = MPI_Wtime();
    //MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if(fh==NULL){
    	printf("File not exist\n");
    	return -1;
    }
       
    for(i = 0; i < repeat_time; i++) {
    
    //    MPI_Barrier(MPI_COMM_WORLD);
        MPI_File_read_at( fh, tmp_pos, read_data, 1, contig_type, &status );
        tmp_pos += stride;
        nanosleep(&ts, NULL);
    }
   

    MPI_File_close(&fh);

    elapsed_time = MPI_Wtime() - start_time;

    MPI_Reduce(&elapsed_time, &all_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&elapsed_time, &all_time_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    MPI_Reduce(&elapsed_time, &all_time_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

    all_time_avg /= proc_num;



    MPI_Barrier(MPI_COMM_WORLD);
    
    double data_in_mb = (proc_num*(double)req_size*repeat_time)/(1024.0*1024.0);
    if(my_rank == 0)
        printf("Total time: %lf Min time: %lf Avg time: %lf Total data: %dM Agg Bandwidth: %lf\n", all_time, all_time_min, all_time_avg, (int)data_in_mb, data_in_mb/all_time);
    

//    printf("%d: %lf\n",my_rank, elapsed_time);
    free(read_data);
    MPI_Type_free(&contig_type);
    
    MPI_Finalize();

    return 0;
}
示例#3
0
文件: blurmain.c 项目: nip3o/tddc78
int main (int argc, char ** argv) {
   int taskid, ntasks;

    int xsize, ysize, colmax;
    pixel src[MAX_PIXELS];
    double w[MAX_RAD];

    struct timespec stime, etime;
    struct timespec tstime, tetime;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);

    // Create a custom MPI datatype for pixel
    pixel item;
    MPI_Datatype pixel_mpi;
    MPI_Datatype type[3] = { MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR };
    int blocklen[] = { 1, 1, 1 };
    MPI_Aint start, disp[3];

    MPI_Address( &item, &start );
    MPI_Address( &item.r, &disp[0] );
    MPI_Address( &item.g, &disp[1] );
    MPI_Address( &item.b, &disp[2] );

    disp[0] -= start;
    disp[1] -= start;
    disp[2] -= start;

    MPI_Type_struct(3, blocklen, disp, type, &pixel_mpi);
    MPI_Type_commit(&pixel_mpi);

    int buffsize, radius, startY, endY;

    /* Take care of the arguments */
    if (argc != 4) {
        fprintf(stderr, "Usage: %s radius infile outfile\n", argv[0]);
        exit(1);
    }
    radius = atoi(argv[1]);
    if((radius > MAX_RAD) || (radius < 1)) {
        fprintf(stderr, "Radius (%d) must be greater than zero and less then %d\n", radius, MAX_RAD);
        exit(1);
    }

    if (taskid == ROOT) {
        /* read file */
        if(read_ppm (argv[2], &xsize, &ysize, &colmax, (char *) src) != 0)
            exit(1);

        if (colmax > 255) {
            fprintf(stderr, "Too large maximum color-component value\n");
            exit(1);
        }

        /* filter */
        printf("Has read the image, generating coefficients\n");
        get_gauss_weights(radius, w);
    }

    // Broadcast the gaussian weight vector
    MPI_Bcast(w, MAX_RAD, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
    // Broadcast image dimensions
    MPI_Bcast(&xsize, 1, MPI_INT, ROOT, MPI_COMM_WORLD);
    MPI_Bcast(&ysize, 1, MPI_INT, ROOT, MPI_COMM_WORLD);

    // Calculate chunk size
    buffsize = ceil((float)ysize / (float)ntasks) * xsize;
    pixel recvbuff[MAX_PIXELS];

    int sendcnts[ntasks], displs[ntasks], result_write_starts[ntasks], recievecounts[ntasks];
    int i;
    // Generate sendcount and displacement vectors for Scatterv
    for (i = 0; i < ntasks; i++) {
        // Send enought neighbors to make it possible to also calculate
        // blur in the edges of the chunk
        sendcnts[i] = buffsize + 2 * radius * xsize;
        displs[i] = max(0, i * buffsize);
    }

    clock_gettime(CLOCK_REALTIME, &tstime);

    // Send the image in chunks to all nodes
    MPI_Scatterv(src, sendcnts, displs,
                 pixel_mpi, recvbuff, buffsize + 2 * radius * xsize,
                 pixel_mpi, ROOT, MPI_COMM_WORLD);

    clock_gettime(CLOCK_REALTIME, &stime);

    // Run the filter on the recieved chunk
    blurfilter(xsize, (ysize / ntasks) + 2 * radius, recvbuff, radius, w, taskid);

    clock_gettime(CLOCK_REALTIME, &etime);
    printf("Filtering at %i took: %g secs\n", taskid, (etime.tv_sec  - stime.tv_sec) +
        1e-9*(etime.tv_nsec  - stime.tv_nsec));

    // Generate sendcount and displacement vectors for Scatterv
    for (i = 0; i < ntasks; i++) {
        result_write_starts[i] = i * buffsize + xsize * radius;
        // Only send as much of the chunk that is really useful data
        recievecounts[i] = buffsize;
    }

    // Start writing from the beginning of the buffer if root
    result_write_starts[0] = 0;

    // Since the root node has no overlap in the beginning, we need to
    // send a little bit more from that node than from the rest.
    recievecounts[0] = buffsize + xsize * radius;

    pixel* result_read_start;
    if(taskid==ROOT) {
        // Root-node has no duplicated data in the beginning
        result_read_start = recvbuff;
    } else {
        // Jump over the duplicated data in the beginning of each chunk
        result_read_start = recvbuff + xsize * radius;
    }

    MPI_Gatherv(result_read_start, recievecounts[taskid], pixel_mpi,
                src, recievecounts, result_write_starts,
                pixel_mpi, ROOT, MPI_COMM_WORLD);

    clock_gettime(CLOCK_REALTIME, &tetime);

    MPI_Finalize();


    /* write result */
    if (taskid == ROOT) {
        printf("Everything took: %g secs\n", (tetime.tv_sec  - tstime.tv_sec) +
           1e-9*(tetime.tv_nsec  - tstime.tv_nsec));


        printf("Writing output file\n");

        if(write_ppm (argv[3], xsize, ysize, (char *)src) != 0)
          exit(1);
    }

    return(0);
}
示例#4
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    if (rank == 0)
        if (verbose) printf("MPI RMA Strided Put Test:\n");

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf  + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided put operations */

    for (i = 0; i < ITERATIONS; i++) {
      MPI_Aint idx_loc[SUB_YDIM];
      int idx_rem[SUB_YDIM];
      int blk_len[SUB_YDIM];
      MPI_Datatype src_type, dst_type;

      if (rank == 0)
        if (verbose) printf(" + iteration %d\n", i);

      for (j = 0; j < SUB_YDIM; j++) {
        MPI_Get_address(&src_buf[j*XDIM], &idx_loc[j]);
        idx_rem[j] = j*XDIM*sizeof(double);
        blk_len[j] = SUB_XDIM*sizeof(double);
      }

      MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type);
      MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM*sizeof(double), idx_rem, MPI_BYTE, &dst_type);

      MPI_Type_commit(&src_type);
      MPI_Type_commit(&dst_type);

      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
      MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win);
      MPI_Win_unlock(peer, buf_win);

      MPI_Type_free(&src_type);
      MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = (1.0 + ((rank+nranks-1)%nranks));
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = SUB_XDIM; i < XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = 0; i < XDIM; i++) {
      for (j = SUB_YDIM; j < YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    MPI_Win_unlock(rank, buf_win);

    MPI_Win_free(&buf_win);
    MPI_Free_mem(win_buf);
    MPI_Free_mem(src_buf);
    MPI_Free_mem(dst_buf);

    MPI_Finalize();

    if (errors == 0) {
      if (rank == 0)
        printf(" No Errors\n");
      return 0;
    } else {
      printf("%d: Fail\n", rank);
      return 1;
    }
}
示例#5
0
void parallel_blas3_product(double *A, double *B, double *C, int m, int k, int n, int id, int np) {
    if (k % np != 0) {
        if (id == 0)
            fprintf(stderr, "k is not divisible by np.\n");

        MPI_Abort(MPI_COMM_WORLD, 1);
        
    }
    MPI_Status status;
    int l_k = k / np;
    double *l_A = allocate_double_vector(l_k * m);
    double *l_B = allocate_double_vector(l_k * k);
    MPI_Datatype block_col_t;
    MPI_Datatype block_row_t;
    
    // for blocks in B = k x n
    MPI_Type_vector(
        n,          // count = number of blocks, i.e. length of column * l_k(num rows)
        l_k,              // blocklen = number of things in each block
        k,              // stride = difference between start of blocks
        MPI_DOUBLE,     // old datatype
        &block_row_t    // new datatype
        );
    MPI_Type_commit(&block_row_t);

    // for column of A= m x k
    MPI_Type_contiguous(
        m * l_k,        // count = number of items
        MPI_DOUBLE,     // old_type = type of items
        &block_col_t    // new_mpi_type = the new datatype
        );
    MPI_Type_commit(&block_col_t);

    if (id == 0) {
        // copy correct elements from A to l_A
        memcpy(l_A, A, sizeof(double) * l_k * m);
        for (int i = 1; i < np; ++i) {
            MPI_Send(&(A[0 + m*(i*l_k)]), 1, block_col_t, i, 0, MPI_COMM_WORLD);
        }
    }
    else {
        MPI_Recv(l_A, (m*l_k), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    }


    if (id == 0) {
        // copy numbers from B to l_B
        
        for (int col = 0; col < n; ++col) {
            for (int row = 0; row < l_k; ++row) {
                l_B[row + l_k*col] = B[row + k*col];
            }
        }

        for (int i = 1; i < np; ++i) {
            MPI_Send(&(B[i*l_k]), 1, block_row_t, i, 0, MPI_COMM_WORLD);
        }
    }
    else {
        MPI_Recv(l_B, (l_k*n), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    }

    /*
    //debugging only
    for (int i = 0; i < l_k*n; ++i) {
        printf("[%i]: Row l_B[%i]=%f\n", id, i, l_B[i]);
    }

    for (int i = 0; i < l_k*m; ++i) {
        printf("[%i]: Col l_A[%i]=%f\n", id, i, l_A[i]);
    }
    */




    // C only matters on process 0 and should be allocated outside this function
    double *local_C = allocate_double_vector(m*n);
    cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, l_k, 1, l_A, m, l_B, l_k, 0, local_C, m);
    MPI_Reduce(local_C, C, m*n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    free(local_C);
    free(l_A);
    free(l_B);
}
示例#6
0
/* test uses a struct type that describes data that is contiguous,
 * but processed in a noncontiguous way.
 */
int struct_negdisp_test(void)
{
    int err, errs = 0;
    int sendbuf[6] = { 1, 2, 3, 4, 5, 6 };
    int recvbuf[6] = { -1, -2, -3, -4, -5, -6 };
    MPI_Datatype mystruct;
    MPI_Request request;
    MPI_Status status;

    MPI_Aint disps[2]     = { 0,       -1*((int) sizeof(int)) };
    int blks[2]           = { 1,       1, };
    MPI_Datatype types[2] = { MPI_INT, MPI_INT };

    err = MPI_Type_struct(2, blks, disps, types, &mystruct);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Type_struct returned error\n");
	}
    }

    MPI_Type_commit(&mystruct);

    err = MPI_Irecv(recvbuf+1, 4, MPI_INT, 0, 0, MPI_COMM_SELF, &request);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Irecv returned error\n");
	}
    }

    err = MPI_Send(sendbuf+2, 2, mystruct, 0, 0, MPI_COMM_SELF);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Send returned error\n");
	}
    }

    err = MPI_Wait(&request, &status);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Wait returned error\n");
	}
    }

    /* verify data */
    if (recvbuf[0] != -1) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[0] = %d; should be %d\n", recvbuf[0], -1);
	}
    }
    if (recvbuf[1] != 3) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[1] = %d; should be %d\n", recvbuf[1], 3);
	}
    }
    if (recvbuf[2] != 2) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[2] = %d; should be %d\n", recvbuf[2], 2);
	}
    }
    if (recvbuf[3] != 5) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[3] = %d; should be %d\n", recvbuf[3], 5);
	}
    }
    if (recvbuf[4] != 4) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[4] = %d; should be %d\n", recvbuf[4], 4);
	}
    }
    if (recvbuf[5] != -6) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[5] = %d; should be %d\n", recvbuf[5], -6);
	}
    }

    MPI_Type_free(&mystruct);

    return errs;
}
示例#7
0
/** Optimized implementation of the ARMCI IOV operation that uses an MPI
  * datatype to achieve a one-sided gather/scatter.  Does not use MPI_BOTTOM.
  */
int ARMCII_Iov_op_datatype_no_bottom(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count,
    MPI_Datatype type, int proc) {

    gmr_t *mreg;
    MPI_Datatype  type_loc, type_rem;
    MPI_Aint      disp_loc[count];
    int           disp_rem[count];
    int           block_len[count];
    void         *dst_win_base;
    int           dst_win_size, i, type_size;
    void        **buf_rem, **buf_loc;
    MPI_Aint      base_rem;
    MPI_Aint      base_loc;
    void         *base_loc_ptr;

    switch(op) {
      case ARMCII_OP_ACC:
      case ARMCII_OP_PUT:
        buf_rem = dst;
        buf_loc = src;
        break;
      case ARMCII_OP_GET:
        buf_rem = src;
        buf_loc = dst;
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    MPI_Type_size(type, &type_size);

    mreg = gmr_lookup(buf_rem[0], proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer");

    dst_win_base = mreg->slices[proc].base;
    dst_win_size = mreg->slices[proc].size;

    MPI_Get_address(dst_win_base, &base_rem);

    /* Pick a base address for the start of the origin's datatype */
    base_loc_ptr = buf_loc[0];
    MPI_Get_address(base_loc_ptr, &base_loc);

    for (i = 0; i < count; i++) {
      MPI_Aint target_rem, target_loc;
      MPI_Get_address(buf_loc[i], &target_loc);
      MPI_Get_address(buf_rem[i], &target_rem);
      disp_loc[i]  =  target_loc - base_loc;
      disp_rem[i]  = (target_rem - base_rem)/type_size;
      block_len[i] = elem_count;

      ARMCII_Assert_msg((target_rem - base_rem) % type_size == 0, "Transfer size is not a multiple of type size");
      ARMCII_Assert_msg(disp_rem[i] >= 0 && disp_rem[i] < dst_win_size, "Invalid remote pointer");
      ARMCII_Assert_msg(((uint8_t*)buf_rem[i]) + block_len[i] <= ((uint8_t*)dst_win_base) + dst_win_size, "Transfer exceeds buffer length");
    }

    MPI_Type_create_hindexed(count, block_len, disp_loc, type, &type_loc);
    MPI_Type_create_indexed_block(count, elem_count, disp_rem, type, &type_rem);
    //MPI_Type_indexed(count, block_len, disp_rem, type, &type_rem);

    MPI_Type_commit(&type_loc);
    MPI_Type_commit(&type_rem);

    gmr_lock(mreg, proc);

    switch(op) {
      case ARMCII_OP_ACC:
        gmr_accumulate_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_PUT:
        gmr_put_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_GET:
        gmr_get_typed(mreg, MPI_BOTTOM, 1, type_rem, base_loc_ptr, 1, type_loc, proc);
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    gmr_unlock(mreg, proc);

    MPI_Type_free(&type_loc);
    MPI_Type_free(&type_rem);

    return 0;
}    
static int two_phase_exchage_data(mca_io_ompio_file_t *fh,
				  void *buf,
				  char *write_buf,
				  struct iovec *offset_length,
				  int *send_size,int *start_pos,
				  int *recv_size,
				  OMPI_MPI_OFFSET_TYPE off,
				  OMPI_MPI_OFFSET_TYPE size, int *count,
				  int *partial_recv, int *sent_to_proc,
				  int contig_access_count,
				  OMPI_MPI_OFFSET_TYPE min_st_offset,
				  OMPI_MPI_OFFSET_TYPE fd_size,
				  OMPI_MPI_OFFSET_TYPE *fd_start,
				  OMPI_MPI_OFFSET_TYPE *fd_end,
				  Flatlist_node *flat_buf,
				  mca_io_ompio_access_array_t *others_req,
				  int *send_buf_idx, int *curr_to_proc,
				  int *done_to_proc, int iter,
				  int *buf_idx,MPI_Aint buftype_extent,
				  int striping_unit, int *aggregator_list,
				  int *hole){
  
    int *tmp_len=NULL, sum, *srt_len=NULL, nprocs_recv, nprocs_send,  k,i,j;
    int ret=OMPI_SUCCESS;
    MPI_Request *requests=NULL, *send_req=NULL;
    MPI_Datatype *recv_types=NULL;
    OMPI_MPI_OFFSET_TYPE *srt_off=NULL;
    char **send_buf = NULL; 
    
    
    ret = fh->f_comm->c_coll.coll_alltoall (recv_size,
					    1,
					    MPI_INT,
					    send_size,
					    1,
					    MPI_INT,
					    fh->f_comm,
					    fh->f_comm->c_coll.coll_alltoall_module);
    
    if ( OMPI_SUCCESS != ret ){
      return ret;
    }

    nprocs_recv = 0;
    for (i=0;i<fh->f_size;i++){
      if (recv_size[i]){
	nprocs_recv++;
      }
    }
    
    
    recv_types = (MPI_Datatype *)
	malloc (( nprocs_recv + 1 ) * sizeof(MPI_Datatype *));
    
    if ( NULL == recv_types ){
      return OMPI_ERR_OUT_OF_RESOURCE;
    }

    tmp_len = (int *) malloc(fh->f_size*sizeof(int));
    
    if ( NULL == tmp_len ) {
      return OMPI_ERR_OUT_OF_RESOURCE;
    }

    j = 0;
    for (i=0;i<fh->f_size;i++){
	if (recv_size[i]) {
	    if (partial_recv[i]) {
		k = start_pos[i] + count[i] - 1;
		tmp_len[i] = others_req[i].lens[k];
		others_req[i].lens[k] = partial_recv[i];
	    }
	    MPI_Type_hindexed(count[i], 
			      &(others_req[i].lens[start_pos[i]]),
			      &(others_req[i].mem_ptrs[start_pos[i]]), 
			      MPI_BYTE, recv_types+j);
	    MPI_Type_commit(recv_types+j);
	    j++;
	}
    }

    sum = 0;
    for (i=0;i<fh->f_size;i++) sum += count[i];
    srt_off = (OMPI_MPI_OFFSET_TYPE *) 
      malloc((sum+1)*sizeof(OMPI_MPI_OFFSET_TYPE));
    
    if ( NULL == srt_off ){
      return OMPI_ERR_OUT_OF_RESOURCE;
    }
    
    srt_len = (int *) malloc((sum+1)*sizeof(int));
    
    if ( NULL == srt_len ) {
      return OMPI_ERR_OUT_OF_RESOURCE;
    }


    two_phase_heap_merge(others_req, count, srt_off, srt_len, start_pos, fh->f_size,fh->f_rank,  nprocs_recv, sum);


    for (i=0; i<fh->f_size; i++) 
        if (partial_recv[i]) {
            k = start_pos[i] + count[i] - 1;
            others_req[i].lens[k] = tmp_len[i];
        }
    
    if ( NULL != tmp_len ){
      free(tmp_len); 
    }

    *hole = 0;
    if (off != srt_off[0]){
	*hole = 1;
    }
    else{
	for (i=1;i<sum;i++){
	    if (srt_off[i] <= srt_off[0] + srt_len[0]){
		int new_len = srt_off[i] + srt_len[i] - srt_off[0];
		if(new_len > srt_len[0]) 
		    srt_len[0] = new_len;
	    }
	    else
		break;
	}
	if (i < sum || size != srt_len[0])
	    *hole = 1;
    }


    if ( NULL != srt_off ){
      free(srt_off);
    }
    if ( NULL != srt_len ){
      free(srt_len);
    }

    if (nprocs_recv){
	if (*hole){
	    if (off > 0){
		fh->f_io_array = (mca_io_ompio_io_array_t *)malloc 
		    (sizeof(mca_io_ompio_io_array_t));
		if (NULL == fh->f_io_array) {
		    opal_output(1, "OUT OF MEMORY\n");
		    return OMPI_ERR_OUT_OF_RESOURCE;
		}
		fh->f_io_array[0].offset  =(IOVBASE_TYPE *)(intptr_t)off;
		fh->f_num_of_io_entries = 1;
		fh->f_io_array[0].length = size;
		fh->f_io_array[0].memory_address = write_buf;
		if (fh->f_num_of_io_entries){
		    if (OMPI_SUCCESS != fh->f_fbtl->fbtl_preadv (fh, NULL)) {
			opal_output(1, "READ FAILED\n");
			return OMPI_ERROR;
		    }
		}
		
	    }
	    fh->f_num_of_io_entries = 0;
	    if (NULL != fh->f_io_array) {
		free (fh->f_io_array);
		fh->f_io_array = NULL;
	    }
	}
    }
    
    nprocs_send = 0;
    for (i=0; i <fh->f_size; i++) if (send_size[i]) nprocs_send++;

    #if DEBUG_ON
    printf("%d : nprocs_send : %d\n", fh->f_rank,nprocs_send);
    #endif

    requests = (MPI_Request *) 	
	malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); 

    if ( NULL == requests ){
      return OMPI_ERR_OUT_OF_RESOURCE;
    }
    
    j = 0;
    for (i=0; i<fh->f_size; i++) {
	if (recv_size[i]) {
	  ret = MCA_PML_CALL(irecv(MPI_BOTTOM,
				   1,
				   recv_types[j],
				   i,
				   fh->f_rank+i+100*iter,
				   fh->f_comm,
				   requests+j));

	  if ( OMPI_SUCCESS != ret ){
	    return ret;
	  }
	  j++;
	}
    }
    send_req = requests + nprocs_recv;
    
    
    if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) {
	j = 0;
	for (i=0; i <fh->f_size; i++) 
	  if (send_size[i]) {
	    ret = MCA_PML_CALL(isend(((char *) buf) + buf_idx[i],
				     send_size[i],
				     MPI_BYTE,
				     i,
				     fh->f_rank+i+100*iter,
				     MCA_PML_BASE_SEND_STANDARD, 
				     fh->f_comm,
				     send_req+j));	

	    if ( OMPI_SUCCESS != ret ){
	      return ret;
	    }
	    
	    j++;
	    buf_idx[i] += send_size[i];
	  }
    }
    else if(nprocs_send && (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY))){
      send_buf = (char **) malloc(fh->f_size*sizeof(char*));
      if ( NULL == send_buf ){
	return OMPI_ERR_OUT_OF_RESOURCE;
      }
      for (i=0; i < fh->f_size; i++){
	if (send_size[i]) {
	  send_buf[i] = (char *) malloc(send_size[i]);
	  
	  if ( NULL == send_buf[i] ){
	    return OMPI_ERR_OUT_OF_RESOURCE;
	  }
	}
      }
      
      ret = two_phase_fill_send_buffer(fh, buf,flat_buf, send_buf,
				       offset_length, send_size,
				       send_req,sent_to_proc,
				       contig_access_count, 
				       min_st_offset, fd_size,
				       fd_start, fd_end, send_buf_idx,
				       curr_to_proc, done_to_proc,
				       iter, buftype_extent, striping_unit,
				       aggregator_list);
      
      if ( OMPI_SUCCESS != ret ){
	return ret;
      }
    }

    for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i);
    free(recv_types);
    ret = ompi_request_wait_all (nprocs_send+nprocs_recv,
				 requests,
				 MPI_STATUS_IGNORE);
    

    if ( NULL != requests ){
      free(requests);
    }
    
    return ret;
}
示例#9
0
int main( int argc, char *argv[])
{
	struct a {	int	i;
			char	c;
		} s[10], s1[10];
	int j;
	int errs = 0;
	int rank, size, tsize;
	MPI_Aint text;
	int blens[2];
	MPI_Aint disps[2];
	MPI_Datatype bases[2];
	MPI_Datatype str, con;
	MPI_Status status;

	MTest_Init( &argc, &argv );

	MPI_Comm_rank( MPI_COMM_WORLD, &rank );
	MPI_Comm_size( MPI_COMM_WORLD, &size );

	for( j = 0; j < 10; j ++ ) {
		s[j].i = j + rank;
		s[j].c = j + rank + 'a';
	}

	blens[0] = blens[1] = 1;
	disps[0] = 0; disps[1] = sizeof(int);
	bases[0] = MPI_INT; bases[1] = MPI_CHAR;
	MPI_Type_struct( 2, blens, disps, bases, &str );
	MPI_Type_commit( &str );
	MPI_Type_contiguous( 10, str, &con );
	MPI_Type_commit( &con );
	MPI_Type_size( con, &tsize );
	MPI_Type_extent( con, &text );

	MTestPrintfMsg( 0, "Size of MPI array is %d, extent is %d\n",
			tsize, text );

	/* The following block of code is only for verbose-level output */
        {
	void * p1, *p2;
	p1 = s;
	p2 = &(s[10].i);  /* This statement may fail on some systems */
	MTestPrintfMsg( 0,
		"C array starts at %p and ends at %p for a length of %d\n",
		s, &(s[9].c), (char *)p2-(char *)p1 );
        }

	MPI_Type_extent( str, &text );
	MPI_Type_size( str, &tsize );
	MTestPrintfMsg( 0, "Size of MPI struct is %d, extent is %d\n",
			tsize, (int)text );
	MTestPrintfMsg( 0, "Size of C struct is %d\n", sizeof(struct a) );
	if (text != sizeof(struct a)) {
	    fprintf( stderr,
		     "Extent of struct a (%d) does not match sizeof (%d)\n",
		     (int)text, (int)sizeof(struct a) );
	    errs++;
	}

	MPI_Send( s, 1, con, rank ^ 1, 0, MPI_COMM_WORLD );
	MPI_Recv( s1, 1, con, rank ^ 1, 0, MPI_COMM_WORLD, &status );

	for( j = 0; j < 10; j++ ) {
	    MTestPrintfMsg( 0, "%d Sent: %d %c, Got: %d %c\n", rank,
			    s[j].i, s[j].c, s1[j].i, s1[j].c );
	    if ( s1[j].i != j + status.MPI_SOURCE ) {
		errs++;
		fprintf( stderr, "Got s[%d].i = %d; expected %d\n", j, s1[j].i,
			j + status.MPI_SOURCE );
	    }
	    if ( s1[j].c != 'a' + j + status.MPI_SOURCE ) {
		errs++;
		/* If the character is not a printing character,
		   this can generate a file that diff, for example,
		   believes is a binary file */
		if (isprint( (int)(s1[j].c) )) {
		    fprintf( stderr, "Got s[%d].c = %c; expected %c\n",
			     j, s1[j].c, j + status.MPI_SOURCE + 'a');
		}
		else {
		    fprintf( stderr, "Got s[%d].c = %x; expected %c\n",
			     j, (int)s1[j].c, j + status.MPI_SOURCE + 'a');
		}
	    }
	}

	MPI_Type_free( &str );
	MPI_Type_free( &con );

	MTest_Finalize( errs );
	MPI_Finalize();
	return 0;
}
示例#10
0
static int test_vard(int ncid, int *varid)
{
    int          rank, nprocs, err, nerrs=0, i, buf[NY+4][NX+4];
    int          array_of_sizes[2], array_of_subsizes[2], array_of_starts[2];
    MPI_Offset   start[2], count[2];
    MPI_Datatype buftype, rec_filetype, fix_filetype;

    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    start[0] = 0; start[1] = NX*rank;
    count[0] = 2; count[1] = NX;

    /* create a buftype with ghost cells on each side */
    array_of_sizes[0]    = count[0]+4;
    array_of_sizes[1]    = count[1]+4;
    array_of_subsizes[0] = count[0];
    array_of_subsizes[1] = count[1];
    array_of_starts[0]   = 2;
    array_of_starts[1]   = 2;
    MPI_Type_create_subarray(2, array_of_sizes, array_of_subsizes,
                             array_of_starts, MPI_ORDER_C,
                             MPI_INT, &buftype);
    MPI_Type_commit(&buftype);

    /* create a file type for the fixed-size variable */
    array_of_sizes[0]    = 2;
    array_of_sizes[1]    = NX*nprocs;
    array_of_subsizes[0] = count[0];
    array_of_subsizes[1] = count[1];
    array_of_starts[0]   = start[0];
    array_of_starts[1]   = start[1];
    MPI_Type_create_subarray(2, array_of_sizes, array_of_subsizes,
                             array_of_starts, MPI_ORDER_C,
                             MPI_INT, &fix_filetype);
    MPI_Type_commit(&fix_filetype);

    /* create a file type for the record variable */
    int *array_of_blocklengths=(int*) malloc(count[0]*sizeof(int));
    MPI_Aint *array_of_displacements=(MPI_Aint*) malloc(count[0]*sizeof(MPI_Aint));
    MPI_Offset recsize;
    err = ncmpi_inq_recsize(ncid, &recsize);
    for (i=0; i<count[0]; i++) {
        array_of_blocklengths[i] = count[1];
        array_of_displacements[i] = start[1]*sizeof(int) + recsize * i;
    }
    MPI_Type_create_hindexed(2, array_of_blocklengths, array_of_displacements,
                             MPI_INT, &rec_filetype);
    MPI_Type_commit(&rec_filetype);
    free(array_of_blocklengths);
    free(array_of_displacements);

    TRC(ncmpi_put_vard_all)(ncid, varid[0], rec_filetype, &buf[0][0], 1, buftype); CHECK_ERR
    TRC(ncmpi_rename_var)(ncid, varid[0], "rec_VAR"); CHECK_ERR
    TRC(ncmpi_put_vard_all)(ncid, varid[1], fix_filetype, &buf[0][0], 1, buftype); CHECK_ERR
    TRC(ncmpi_rename_var)(ncid, varid[0], "rec_var"); CHECK_ERR

    TRC(ncmpi_begin_indep_data)(ncid); CHECK_ERR
    TRC(ncmpi_put_vard)(ncid, varid[0], rec_filetype, &buf[0][0], 1, buftype); CHECK_ERR
    TRC(ncmpi_rename_var)(ncid, varid[0], "rec_VAR"); CHECK_ERR
    TRC(ncmpi_put_vard)(ncid, varid[1], fix_filetype, &buf[0][0], 1, buftype); CHECK_ERR
    TRC(ncmpi_rename_var)(ncid, varid[0], "rec_var"); CHECK_ERR
    TRC(ncmpi_end_indep_data)(ncid); CHECK_ERR

    MPI_Type_free(&rec_filetype);
    MPI_Type_free(&fix_filetype);
    MPI_Type_free(&buftype);
    return nerrs;
}
示例#11
0
int main( int argc, char *argv[] )
{
    int myrank, size;
    int *sendbuf,*recvbuf;
    int i;
    int count;
    int block[3];
    MPI_Aint disp[3];

    MPI_Status *stat;
    MPI_Request *request;
    MPI_Datatype newtype;
    MPI_Datatype dtype[3];

    MPI_Init( 0, 0 );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Comm_rank( MPI_COMM_WORLD, &myrank );

    sendbuf = (int*)malloc(MAXSIZE);
    recvbuf = (int*)malloc(MAXSIZE);
    stat = (MPI_Status*)malloc(sizeof(MPI_Status)*size);
    request = (MPI_Request*)malloc(sizeof(MPI_Request)*size);

    for(i=0;i<MAXSIZE/4;i++){
       sendbuf[i] = 1;
       recvbuf[i] = 0;
    }

    count = 2;

    dtype[0] = MPI_LB;
    dtype[1] = MPI_INT;
    dtype[2] = MPI_UB;

    block[0] = 1;
    block[1] = (MAXSIZE/count)/size/sizeof(int);
    block[2] = 1;

    disp[0] = 0;
    disp[1] = (MAXSIZE/count)/size*myrank;
    disp[2] = MAXSIZE/count;

    MPI_Type_struct( 3, block, disp, dtype, &newtype);
    MPI_Type_commit(&newtype);

    if(myrank == 0){
        MPI_Send( sendbuf, count, newtype, 1, 0, MPI_COMM_WORLD);
    }
    if(myrank == 1){
        MPI_Recv( recvbuf, count, newtype, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }

    if(myrank == 1){
    for(i=0;i<block[1];i++){
        if((0 != recvbuf[i])){
            printf("MYRANK %d failed 1 recvbuf[%d] %d\n",myrank,i,recvbuf[i]);
            MPI_Finalize();
            exit(0);
        }
    }
    for(i = block[1] ; i<block[1]*2;i++){
        if(1 != recvbuf[i]){
            printf("MYRANK %d failed 2 recvbuf[%d] %d\n",myrank,i,recvbuf[i]);
            MPI_Finalize();
            exit(0);
        }
    }
    for(i = block[1]*2 ; i<block[1]*3;i++){
        if(0 != recvbuf[i]){
            printf("MYRANK %d failed 3 recvbuf[%d] %d\n",myrank,i,recvbuf[i]);
            MPI_Finalize();
            exit(0);
        }
    }
    for(i = block[1]*3 ; i<block[1]*4;i++){
        if(1 != recvbuf[i]){
            printf("MYRANK %d failed 4 recvbuf[%d] %d\n",myrank,i,recvbuf[i]);
            MPI_Finalize();
            exit(0);
        }
    }
    }


    MPI_Finalize();

    return 0;
}
示例#12
0
int main( int argc, char **argv ) {
    num_particles = read_int( argc, argv, "-n", 1000 );
    char *savename = read_string( argc, argv, "-o", NULL );
    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &num_procs );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    fsave = savename && rank == 0 ? fopen( savename, "w" ) : NULL;
    particles = (particle_t*) malloc( num_particles * sizeof(particle_t) );
    MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE );
    MPI_Type_commit( &PARTICLE );
    set_size( num_particles );

    init_vars();
    init_partitions();

    if( rank == 0 ) {
        init_particles( num_particles, particles );
    }
    MPI_Bcast(particles, num_particles, PARTICLE, 0, MPI_COMM_WORLD);
    partition_particles();
    init_grid();

    double simulation_time = read_timer();

    for( int step = 0; step < NSTEPS; step++ ) {
        if (rank == 0) {
            right_fringe();
            MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req);
            MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req);
            MPI_Wait(&send_right_req, &send_right_stat);
            MPI_Wait(&recv_right_req, &recv_right_stat);
            MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count);
            num_augmented_particles = num_my_particles + recvd_right_count;
            memcpy(my_particles + num_my_particles, right_receiving_buffer, recvd_right_count * sizeof(particle_t));
        } else if (rank == (num_procs-1)) {
            left_fringe();
            MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req);
            MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req);
            MPI_Wait(&send_left_req, &send_left_stat);
            MPI_Wait(&recv_left_req, &recv_left_stat);
            MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count);
            num_augmented_particles = num_my_particles + recvd_left_count;
            memcpy(my_particles + num_my_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t));
        } else {
            left_fringe();
            right_fringe();
            MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req);
            MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req);
            MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req);
            MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req);
            MPI_Wait(&send_left_req, &send_left_stat);
            MPI_Wait(&send_right_req, &send_right_stat);
            MPI_Wait(&recv_left_req, &recv_left_stat);
            MPI_Wait(&recv_right_req, &recv_right_stat);
            MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count);
            MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count);
            num_augmented_particles = num_my_particles + recvd_left_count + recvd_right_count;
            memcpy(my_particles + num_my_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t));
            memcpy(my_particles + num_my_particles + recvd_left_count, right_receiving_buffer, recvd_right_count * sizeof(particle_t));
        }

        populate_grid();
        time_step();

        num_sending_left = 0;
        num_sending_right = 0;
        int num_remaining_particles = 0;
        for ( int i = 0; i < num_my_particles; i++ ) {
            if (rank != 0 && my_particles[i].x <= partition_offsets[rank]) {
                left_sending_buffer[num_sending_left++] = my_particles[i];
            } else if (rank != (num_procs-1) && my_particles[i].x > partition_offsets[rank+1]) {
                right_sending_buffer[num_sending_right++] = my_particles[i];
            } else {
                remaining_particles[num_remaining_particles++] = my_particles[i];
            }
        }
        if (rank == 0) {
            MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req);
            MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req);
            MPI_Wait(&send_right_req, &send_right_stat);
            MPI_Wait(&recv_right_req, &recv_right_stat);
            MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count);
            num_augmented_particles = num_remaining_particles + recvd_right_count;
            memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t));
            memcpy(my_particles + num_remaining_particles, right_receiving_buffer, recvd_right_count * sizeof(particle_t));
            num_my_particles = num_augmented_particles;
        } else if (rank == (num_procs-1)) {
            MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req);
            MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req);
            MPI_Wait(&send_left_req, &send_left_stat);
            MPI_Wait(&recv_left_req, &recv_left_stat);
            MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count);
            num_augmented_particles = num_remaining_particles + recvd_left_count;
            memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t));
            memcpy(my_particles + num_remaining_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t));
            num_my_particles = num_augmented_particles;
        } else {
            MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req);
            MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req);
            MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req);
            MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req);
            MPI_Wait(&send_right_req, &send_right_stat);
            MPI_Wait(&recv_right_req, &recv_right_stat);
            MPI_Wait(&send_left_req, &send_left_stat);
            MPI_Wait(&recv_left_req, &recv_left_stat);
            MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count);
            MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count);
            num_augmented_particles = num_remaining_particles + recvd_left_count + recvd_right_count;
            memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t));
            memcpy(my_particles + num_remaining_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t));
            memcpy(my_particles + num_remaining_particles + recvd_left_count, right_receiving_buffer, recvd_right_count * sizeof(particle_t));
            num_my_particles = num_augmented_particles;
        }
    }

    simulation_time = read_timer() - simulation_time;
    if( rank == 0 ) {
        printf( "num_particles = %d, num_procs = %d, simulation time = %g s\n", num_particles, num_procs, simulation_time );
    }

    if (savename) {
        if (rank == 0) {
            final_partition_sizes = (int*) malloc( num_procs * sizeof(int) );
        }
        MPI_Gather(&num_my_particles, 1, MPI_INT, final_partition_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD);
        if (rank == 0) {
            final_partition_offsets = (int*) malloc( num_procs * sizeof(int) );
            final_partition_offsets[0] = 0;
            for (int i = 1; i < num_procs; i++) {
                final_partition_offsets[i] = final_partition_offsets[i-1] + final_partition_sizes[i-1];
            }
        }
        MPI_Gatherv(my_particles, num_my_particles, PARTICLE, particles, final_partition_sizes, final_partition_offsets, PARTICLE, 0, MPI_COMM_WORLD);
        if (rank == 0) {
            save( fsave, num_particles, particles );
            free(final_partition_sizes);
            free(final_partition_offsets);
        }
    }

    free( partition_offsets );
    free( partition_sizes );
    free( my_particles ); // same as my_particles
    free(remaining_particles);
    free( left_sending_buffer );
    free( right_sending_buffer );
    free( left_receiving_buffer );
    free( right_receiving_buffer );
    if( fsave ) fclose( fsave );
    MPI_Finalize();
    return 0;
}
示例#13
0
文件: gather2.c 项目: NexMirror/MPICH
int main(int argc, char **argv)
{
    MPI_Datatype vec;
    double *vecin, *vecout;
    MPI_Comm comm;
    int count, minsize = 2;
    int root, i, n, stride, errs = 0;
    int rank, size;

    MTest_Init(&argc, &argv);

    while (MTestGetIntracommGeneral(&comm, minsize, 1)) {
        if (comm == MPI_COMM_NULL)
            continue;
        /* Determine the sender and receiver */
        MPI_Comm_rank(comm, &rank);
        MPI_Comm_size(comm, &size);

        for (root = 0; root < size; root++) {
            for (count = 1; count < 65000; count = count * 2) {
                n = 12;
                stride = 10;
                vecin = (double *) malloc(n * stride * size * sizeof(double));
                vecout = (double *) malloc(size * n * sizeof(double));

                MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vec);
                MPI_Type_commit(&vec);

                for (i = 0; i < n * stride; i++)
                    vecin[i] = -2;
                for (i = 0; i < n; i++)
                    vecin[i * stride] = rank * n + i;

                if (rank == root) {
                    for (i = 0; i < n; i++) {
                        vecout[rank * n + i] = rank * n + i;
                    }
                    MPI_Gather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL,
                               vecout, n, MPI_DOUBLE, root, comm);
                }
                else {
                    MPI_Gather(vecin, 1, vec, NULL, -1, MPI_DATATYPE_NULL, root, comm);
                }
                if (rank == root) {
                    for (i = 0; i < n * size; i++) {
                        if (vecout[i] != i) {
                            errs++;
                            if (errs < 10) {
                                fprintf(stderr, "vecout[%d]=%d\n", i, (int) vecout[i]);
                            }
                        }
                    }
                }
                MPI_Type_free(&vec);
                free(vecin);
                free(vecout);
            }
        }
        MTestFreeComm(&comm);
    }

    /* do a zero length gather */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (rank == 0) {
        MPI_Gather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, NULL, 0, MPI_BYTE, 0, MPI_COMM_WORLD);
    }
    else {
        MPI_Gather(NULL, 0, MPI_BYTE, NULL, 0, MPI_BYTE, 0, MPI_COMM_WORLD);
    }

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
示例#14
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *loc_buf;
    MPI_Win buf_win;

    int idx_rem[SUB_YDIM];
    int blk_len[SUB_YDIM];
    MPI_Datatype loc_type, rem_type;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &loc_buf);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf + i) =  1.0 + rank;
        *(loc_buf + i) = -1.0;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Build the datatype */

    for (i = 0; i < SUB_YDIM; i++) {
      idx_rem[i] = i*XDIM;
      blk_len[i] = SUB_XDIM;
    }

    MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &loc_type);
    MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &rem_type);

    MPI_Type_commit(&loc_type);
    MPI_Type_commit(&rem_type);

    /* Perform get operation */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);

    MPI_Get(loc_buf, 1, loc_type, peer, 0, 1, rem_type, buf_win);

    /* Use the datatype only on the remote side (must have SUB_XDIM == XDIM) */
    /* MPI_Get(loc_buf, SUB_XDIM*SUB_YDIM, MPI_DOUBLE, peer, 0, 1, rem_type, buf_win); */

    MPI_Win_unlock(peer, buf_win);

    MPI_Type_free(&loc_type);
    MPI_Type_free(&rem_type);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(loc_buf + i + j*XDIM);
        const double expected = (1.0 + peer);
        if (fabs(actual - expected) > 1.0e-10) {
          SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual); );
          errors++;
          fflush(stdout);
        }
      }
示例#15
0
/* test case from tt#1030 ported to C
 *
 * Thanks to Matthias Lieber for reporting the bug and providing a good test
 * program. */
int struct_struct_test(void)
{
    int err, errs = 0;
    int i, j, dt_size = 0;
    MPI_Request req[2];


#define COUNT (2)
    MPI_Aint displ[COUNT];
    int blens[COUNT];
    MPI_Datatype types[COUNT];
    MPI_Datatype datatype;

    /* A slight difference from the F90 test: F90 arrays are column-major, C
     * arrays are row-major.  So we invert the order of dimensions. */
#define N (2)
#define M (4)
    int array[N][M] =    { {-1, -1, -1, -1}, {-1, -1, -1, -1} };
    int expected[N][M] = { {-1,  1,  2,  5}, {-1,  3,  4,  6} };
    int seq_array[N*M];
    MPI_Aint astart, aend;
    MPI_Aint size_exp = 0;

    /* 1st section selects elements 1 and 2 out of 2nd dimension, complete 1st dim.
     * should receive the values 1, 2, 3, 4 */
    astart = 1;
    aend   = 2;
    err = build_array_section_type(M, astart, aend, &types[0]);
    if (err) {
        errs++;
        if (verbose) fprintf(stderr, "build_array_section_type failed\n");
        return errs;
    }
    blens[0] = N;
    displ[0] = 0;
    size_exp = size_exp + N * (aend-astart+1) * sizeof(int);

    /* 2nd section selects last element of 2nd dimension, complete 1st dim.
     * should receive the values 5, 6 */
    astart = 3;
    aend   = 3;
    err = build_array_section_type(M, astart, aend, &types[1]);
    if (err) {
        errs++;
        if (verbose) fprintf(stderr, "build_array_section_type failed\n");
        return errs;
    }
    blens[1] = N;
    displ[1] = 0;
    size_exp = size_exp + N * (aend-astart+1) * sizeof(int);

    /* create type */
    err = MPI_Type_create_struct(COUNT, blens, displ, types, &datatype);
    check_err(MPI_Type_create_struct);
    err = MPI_Type_commit(&datatype);
    check_err(MPI_Type_commit);

    err = MPI_Type_size(datatype, &dt_size);
    check_err(MPI_Type_size);
    if (dt_size != size_exp) {
        errs++;
        if (verbose) fprintf(stderr, "unexpected type size\n");
    }


    /* send the type to ourselves to make sure that the type describes data correctly */
    for (i = 0; i < (N*M) ; ++i)
        seq_array[i] = i + 1; /* source values 1..(N*M) */
    err = MPI_Isend(&seq_array[0], dt_size/sizeof(int), MPI_INT, 0, 42, MPI_COMM_SELF, &req[0]);
    check_err(MPI_Isend);
    err = MPI_Irecv(&array[0][0], 1, datatype, 0, 42, MPI_COMM_SELF, &req[1]);
    check_err(MPI_Irecv);
    err = MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
    check_err(MPI_Waitall);

    /* check against expected */
    for (i = 0; i < N; ++i) {
        for (j = 0; j < M; ++j) {
            if (array[i][j] != expected[i][j]) {
                errs++;
                if (verbose)
                    fprintf(stderr, "array[%d][%d]=%d, should be %d\n", i, j, array[i][j], expected[i][j]);
            }
        }
    }

    err = MPI_Type_free(&datatype);
    check_err(MPI_Type_free);
    err = MPI_Type_free(&types[0]);
    check_err(MPI_Type_free);
    err = MPI_Type_free(&types[1]);
    check_err(MPI_Type_free);

    return errs;
#undef M
#undef N
#undef COUNT
}
SEXP matrixApply(SEXP result, SEXP data, SEXP margin, SEXP function,
                 int worldRank, int worldSize) {

  SEXP ans, data_size;
  
  MPI_Datatype row_type, column_type;
  MPI_Status status;
  
  int my_start, my_end, N, function_nlines,
    nvectors, offset;
  int local_check = 0, global_check = 0;
  int dimensions[2];

  if (worldRank == MASTER_PROCESS) { 
    data_size = GET_DIM(data);
    
    dimensions[0] = INTEGER_POINTER(data_size)[0];
    dimensions[1] = INTEGER_POINTER(data_size)[1];

    /* function SEXP object is a vector of strings, each element contains
       a single line of the function definition */

    function_nlines = length(function);   
  }

  MPI_Bcast(dimensions, 2, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&function_nlines, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* margin provides the subscripts which the function will be
     applied over.  "1" indicates rows, "2" indicates columns,
     c(1,2)" indicates rows and columns */

  if(worldRank != MASTER_PROCESS)
    PROTECT(margin = allocVector(INTSXP, 1));

  MPI_Bcast(INTEGER(margin), 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* Matrix dimensions in R are interpreted differen than in C.
     We will refer to R rows and columns ordering, so rows are not alligned
     in memory */

  if (INTEGER(margin)[0] == 1) {
    N = dimensions[0];

    /* define vector type type to handle R rows exchange
       (count, blocklength, stride)*/
    MPI_Type_vector (dimensions[1], 1, dimensions[0], MPI_DOUBLE, &row_type);
    MPI_Type_commit (&row_type);

  } else if (INTEGER(margin)[0] == 2) {
    N = dimensions[1];

    /* define contiguous type to handle R columns exchange */
    MPI_Type_contiguous(dimensions[0], MPI_DOUBLE, &column_type);
    MPI_Type_commit(&column_type);
    
  } else if (INTEGER(margin)[0] == 3) {
    // TODO
    DEBUG("Margin number 3 not yet implemented\n");
    return R_NilValue;
  } else {
    DEBUG("Don't know how to deal with margin number %d\n",
          INTEGER(margin)[0]);
    return R_NilValue;
  }
  
  if(worldRank != MASTER_PROCESS) {
  
    /* Allocate memory for SEXP objects on worker nodes.
       alloc... functions do their own error-checking and
       return if the allocation process will fail. */
    loopDistribute(worldRank, worldSize, N, &my_start, &my_end);
    
    if (INTEGER(margin)[0] == 1)
      PROTECT(data = allocMatrix(REALSXP, my_end-my_start, dimensions[1]));
    if (INTEGER(margin)[0] == 2)
      PROTECT(data = allocMatrix(REALSXP, dimensions[0], my_end-my_start));
              
    PROTECT(function = allocVector(STRSXP, function_nlines));
  }

  if ( (data == NULL) ||  (function == NULL) ) {
      local_check = 1;
  } else {
      local_check = 0;
  }

  /* Check whether memory was successfully allocated on all worker nodes */
  MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  /*  No need to free memory if allocation fails on one of the workers
      R_alloc will release it after .Call returns to R */
  if ( global_check != 0 ) {
    /* Remove all references from the stack, I'm not sure if this is necessary */
    if(worldRank != MASTER_PROCESS)
      UNPROTECT(3);

    return ScalarInteger(-1);
  }

  /* Distribute data between processes */

  for (int worker_id=1; worker_id<worldSize; worker_id++) {

    if (worldRank == MASTER_PROCESS) {

      /* Calculate expected message length for each worker */
      loopDistribute(worker_id, worldSize, N, &my_start, &my_end);
      nvectors = my_end - my_start;

      /* If we applying over rows, as defined in R, we need to use the MPI vector type
         sending each row as a separate message */
      if (INTEGER(margin)[0] == 1) {
        for(int k=0; k<nvectors; k++) {
          offset = my_start+k;
          MPI_Send(&REAL(data)[offset], 1, row_type, worker_id, 0, MPI_COMM_WORLD);
        }
      }

      /* R defined columns are alligned in memory, single message of build from contiguous
         column_type elemensts is send */
      else if (INTEGER(margin)[0] == 2) {
        offset = my_start*dimensions[0];
        MPI_Send(&REAL(data)[offset], nvectors, column_type, worker_id, 0, MPI_COMM_WORLD);
      }
    }
    else if (worldRank == worker_id) {

      nvectors = my_end - my_start;

      if (INTEGER(margin)[0] == 1) {
        
        for(int k=0; k<nvectors; k++) {
          offset = k*dimensions[1];
          MPI_Recv(&REAL(data)[offset], dimensions[1], MPI_DOUBLE, MASTER_PROCESS, 0, MPI_COMM_WORLD, &status);
        }
      }
      else if (INTEGER(margin)[0] == 2) {
        MPI_Recv(REAL(data), nvectors, column_type, MASTER_PROCESS, 0, MPI_COMM_WORLD, &status);
      }
    }
  }

  /* Redo loop distribution for the Master process */
  if (worldRank == MASTER_PROCESS) {
      loopDistribute(worldRank, worldSize, N, &my_start, &my_end);
  }
    
  /* Bcast function name or definition, cover case when definition is split into
     several lines and stored as a SEXP string vector */
   bcastRFunction(function, function_nlines, worldRank);
  
  /* Response container, Vector of SEXPs, margin determines vector length */
  PROTECT(ans = allocVector(VECSXP, N));

  do_matrixApply(ans, data, margin, function, my_start, my_end, dimensions, worldRank);

  gatherData(result, ans, N, my_start, my_end, worldRank);
  
  if(worldRank != MASTER_PROCESS) {
    UNPROTECT(4);
  } else {
    UNPROTECT(1);
  }

  return result;

}
示例#17
0
/* regression for tt#1030, checks for bad offset math in the
 * blockindexed and indexed dataloop flattening code */
int flatten_test(void)
{
    int err, errs = 0;
#define ARR_SIZE (9)
    /* real indices              0  1  2  3  4  5  6  7  8
     * indices w/ &array[3]     -3 -2 -1  0  1  2  3  4  5 */
    int array[ARR_SIZE]      = {-1,-1,-1,-1,-1,-1,-1,-1,-1};
    int expected[ARR_SIZE]   = {-1, 0, 1,-1, 2,-1, 3,-1, 4};
    MPI_Datatype idx_type = MPI_DATATYPE_NULL;
    MPI_Datatype blkidx_type = MPI_DATATYPE_NULL;
    MPI_Datatype combo = MPI_DATATYPE_NULL;
#define COUNT (2)
    int displ[COUNT];
    MPI_Aint adispl[COUNT];
    int blens[COUNT];
    MPI_Datatype types[COUNT];

    /* indexed type layout:
     * XX_X
     * 2101  <-- pos (left of 0 is neg)
     *
     * different blens to prevent optimization into a blockindexed
     */
    blens[0] = 2;
    displ[0] = -2; /* elements, puts byte after block end at 0 */
    blens[1] = 1;
    displ[1] = 1; /*elements*/

    err = MPI_Type_indexed(COUNT, blens, displ, MPI_INT, &idx_type);
    check_err(MPI_Type_indexed);
    err = MPI_Type_commit(&idx_type);
    check_err(MPI_Type_commit);

    /* indexed type layout:
     * _X_X
     * 2101  <-- pos (left of 0 is neg)
     */
    displ[0] = -1;
    displ[1] = 1;
    err = MPI_Type_create_indexed_block(COUNT, 1, displ, MPI_INT, &blkidx_type);
    check_err(MPI_Type_indexed_block);
    err = MPI_Type_commit(&blkidx_type);
    check_err(MPI_Type_commit);

    /* struct type layout:
     * II_I_B_B  (I=idx_type, B=blkidx_type)
     * 21012345  <-- pos (left of 0 is neg)
     */
    blens[0]  = 1;
    adispl[0] = 0; /*bytes*/
    types[0]  = idx_type;

    blens[1]  = 1;
    adispl[1] = 4 * sizeof(int); /* bytes */
    types[1]  = blkidx_type;

    /* must be a struct in order to trigger flattening code */
    err = MPI_Type_create_struct(COUNT, blens, adispl, types, &combo);
    check_err(MPI_Type_indexed);
    err = MPI_Type_commit(&combo);
    check_err(MPI_Type_commit);

    /* pack/unpack with &array[3] */
    errs += pack_and_check_expected(combo, "combo", 3, ARR_SIZE, array, expected);

    MPI_Type_free(&combo);
    MPI_Type_free(&idx_type);
    MPI_Type_free(&blkidx_type);

    return errs;
#undef COUNT
}
示例#18
0
文件: main.c 项目: grapefroot/mipt
int main(int argc, char **argv) {
    size_t dimensions;
    size_t i, j;
    scanf("%zu", &dimensions);
    struct complex *matrix = calloc(sizeof(struct complex), dimensions * dimensions);

    struct complex temp;
    for (i = 0; i < dimensions; ++i) {
        for (j = 0; j < dimensions; ++j) {
            scanf("%lf", &temp.re);
            scanf("%lf", &temp.im);
            temp.x = (int) i;
            temp.y = (int) j;
            matrix[i * dimensions + j] = temp;
        }
    }

    int counter, size;
    double begin, end;
    begin = omp_get_wtime();
    MPI_Init(&argc, &argv);
    MPI_Datatype complex_t;
    MPI_Datatype type[4] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT};
    int blocklen[4] = {1, 1, 1, 1};
    //*because readability is our main concern*//*
    MPI_Aint disp[4];
    MPI_Type_create_struct(4, blocklen, disp, type, &complex_t);
    MPI_Type_commit(&complex_t);
    MPI_Comm_rank(MPI_COMM_WORLD, &counter);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    printf("%d %d", counter, size);
    struct complex thread_min = matrix[0];
    thread_min.x = counter;
    thread_min.y = 0;
    struct complex thread_max = matrix[0];
    thread_max.x = counter;
    thread_max.y = 0;
    for (i = (size_t) counter; i < dimensions; i += size) {
        for (j = 0; j < dimensions; ++j) {
            if (length(matrix[i * dimensions + j]) < length(thread_min)) {
                thread_min = matrix[i * dimensions + j];
            }
            if (length(matrix[i * dimensions + j]) > length(thread_max)) {
                thread_max = matrix[i * dimensions + j];
            }
        }
    }
    if (counter != 0) {
        MPI_Send(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD);
    }
    if (counter == 0) {
        struct complex min = thread_min;
        struct complex max = thread_max;
        for (i = 1; i < size; ++i) {
            MPI_Recv(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL);
            MPI_Recv(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL);
            printf("%.2f+i*%.2f", thread_min.re, thread_min.im);
            printf("%.2f+i*%.2f", thread_max.re, thread_max.im);
            if (length(thread_min) < length(min)) {
                min = thread_min;
            }
            if (length(thread_max) > length(max)) {
                max = thread_max;
            }
        }
        printf("max complex number %.2f+i*%.2f position x:%d y:%d \n", max.re, max.im,
               max.x, max.y);

        printf("min complex number %.2f+i*%.2f position x:%d, y:%d \n", min.re, min.im,
               min.x,
               min.y);
    }
    MPI_Finalize();
    end = omp_get_wtime();
    printf("execution time: %f\n", end - begin);
    free(matrix);
    return 0;
}
示例#19
0
int main(int argc, char **argv)
{
    int rank, size;
    int n, energy, niters, px, py;

    int north, south, west, east;
    int bx, by, offx, offy;

    /* three heat sources */
    const int nsources = 3;
    int sources[nsources][2];
    int locnsources;             /* number of sources in my area */
    int locsources[nsources][2]; /* sources local to my rank */

    double t1, t2;

    int iter, i;

    double *aold, *anew, *tmp;

    double heat, rheat;

    int final_flag;

    /* initialize MPI envrionment */
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    /* argument checking and setting */
    setup(rank, size, argc, argv,
          &n, &energy, &niters, &px, &py, &final_flag);

    if (final_flag == 1) {
        MPI_Finalize();
        exit(0);
    }

    /* Create a communicator with a topology */
    MPI_Comm cart_comm;
    int dims[2] = {0,0}, periods[2] = {0,0}, coords[2];

    MPI_Dims_create(size, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);

    /* determine my four neighbors */
    MPI_Cart_shift(cart_comm, 0, 1, &west, &east);
    MPI_Cart_shift(cart_comm, 1, 1, &north, &south);

    /* decompose the domain */
    bx = n / px;    /* block size in x */
    by = n / py;    /* block size in y */
    offx = coords[0] * bx; /* offset in x */
    offy = coords[1] * by; /* offset in y */

    /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", rank, ry,rx,west,east,north,south); */

    /* allocate working arrays & communication buffers */
    MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &aold); /* 1-wide halo zones! */
    MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &anew); /* 1-wide halo zones! */

    /* initialize three heat sources */
    init_sources(bx, by, offx, offy, n,
                 nsources, sources, &locnsources, locsources);

    /* create north-south datatype */
    MPI_Datatype north_south_type;
    MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type);
    MPI_Type_commit(&north_south_type);

    /* create east-west type */
    MPI_Datatype east_west_type;
    MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type);
    MPI_Type_commit(&east_west_type);

    t1 = MPI_Wtime(); /* take time */

    for (iter = 0; iter < niters; ++iter) {

        /* refresh heat sources */
        for (i = 0; i < locnsources; ++i) {
            aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */
        }

        /* exchange data with neighbors */
        MPI_Request reqs[8];
        MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, cart_comm, &reqs[0]);
        MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, cart_comm, &reqs[1]);
        MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, cart_comm, &reqs[2]);
        MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, cart_comm, &reqs[3]);
        MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, cart_comm, &reqs[4]);
        MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, cart_comm, &reqs[5]);
        MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, cart_comm, &reqs[6]);
        MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, cart_comm, &reqs[7]);
        MPI_Waitall(8, reqs, MPI_STATUS_IGNORE);

        /* update grid points */
        update_grid(bx, by, aold, anew, &heat);

        /* swap working arrays */
        tmp = anew; anew = aold; aold = tmp;

        /* optional - print image */
        if (iter == niters-1)
            printarr_par(iter, anew, n, px, py, coords[0], coords[1],
                         bx, by, offx, offy, MPI_COMM_WORLD);
    }

    t2 = MPI_Wtime();

    /* free working arrays and communication buffers */
    MPI_Free_mem(aold);
    MPI_Free_mem(anew);

    MPI_Type_free(&east_west_type);
    MPI_Type_free(&north_south_type);

    /* get final heat in the system */
    MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    if (!rank) printf("[%i] last heat: %f time: %f\n", rank, rheat, t2-t1);

    MPI_Finalize();
}
示例#20
0
/* subarray_4d_fortran_test1()
 *
 * Returns the number of errors encountered.
 */
int subarray_4d_fortran_test1(void)
{
    MPI_Datatype subarray;
    int array[] = {
        -1111, -1112, -1113, -1114, -1115, -1116,
        -1121, -1122, -1123, -1124, -1125, -1126,
        -1131, -1132, -1133, -1134, -1135, -1136,
        -1211, -1212, -1213, -1214, -1215, -1216,
        -1221, -1222, -1223, -1224, -1225, -1226,
        -1231, -1232, -1233, -1234, -1235, -1236,
        -2111, -2112, -2113, -2114, 1, -2116,
        -2121, -2122, -2123, -2124, 2, -2126,
        -2131, -2132, -2133, -2134, 3, -2136,
        -2211, -2212, -2213, -2214, 4, -2216,
        -2221, -2222, -2223, -2224, 5, -2226,
        -2231, -2232, -2233, -2234, 6, -2236
    };

    int array_size[4] = { 6, 3, 2, 2 };
    int array_subsize[4] = { 1, 3, 2, 1 };
    int array_start[4] = { 4, 0, 0, 1 };

    int i, err, errs = 0, sizeoftype;

    /* set up type */
    err = MPI_Type_create_subarray(4,   /* dims */
                                   array_size,
                                   array_subsize,
                                   array_start, MPI_ORDER_FORTRAN, MPI_INT, &subarray);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr,
                    "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs);
        }
        return errs;
    }

    MPI_Type_commit(&subarray);
    MPI_Type_size(subarray, &sizeoftype);
    if (sizeoftype != 6 * sizeof(int)) {
        errs++;
        if (verbose)
            fprintf(stderr, "size of type = %d; should be %d\n",
                    sizeoftype, (int) (6 * sizeof(int)));
        return errs;
    }

    err = pack_and_unpack((char *) array, 1, subarray, 72 * sizeof(int));

    for (i = 0; i < 72; i++) {
        int goodval;
        switch (i) {
            case 40:
                goodval = 1;
                break;
            case 46:
                goodval = 2;
                break;
            case 52:
                goodval = 3;
                break;
            case 58:
                goodval = 4;
                break;
            case 64:
                goodval = 5;
                break;
            case 70:
                goodval = 6;
                break;
            default:
                goodval = 0;
                break;
        }
        if (array[i] != goodval) {
            errs++;
            if (verbose)
                fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval);
        }
    }

    MPI_Type_free(&subarray);
    return errs;
}
示例#21
0
int main(int argc, char ** argv){
  int my_id, root, ierr, num_procs;
  MPI_Status status;

  ierr = MPI_Init(&argc, &argv);//Creat processes
  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
  ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

  /*Make MPI data type for Vars*/
  const int nitems=5;
  int blocklengths[5] = {1, 1, 1, 1, 1};
  MPI_Datatype types[5] = { MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE};
  MPI_Datatype mpi_Vars;
  MPI_Aint offsets[5];

  offsets[0] = offsetof(Vars, mass);
  offsets[1] = offsetof(Vars, xvelocity);
  offsets[2] = offsetof(Vars, yvelocity);
  offsets[3] = offsetof(Vars, energy);
  offsets[4] = offsetof(Vars, press);

  MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_Vars);
  MPI_Type_commit(&mpi_Vars);
  /*start the program*/

  
  int N, type; N = num_procs*100;
  type = 1;
  int zones_to_do = N/num_procs;
  double dt; int count = 0;char str[80];

  FILE *fid, *finit;

  double dx = 1./(double)N;
  double t, T; t = 0.; T = .2;
  int num = 30;
  Vars * U = malloc((N+4)*(N+4)*sizeof(Vars)); init_sys(N+4, N+4, U, dx, dx, 1);
  if(my_id == 0){
    /*I am root*/
    
    finit = fopen("2Dinit.dat","w");
    Write_Cons(N+4, N+4, U, dx, dx, finit);
    fclose(finit);
    int count = 0;
    
  }
  while(t<T){
    //printf("before\n");
    dt = advance_system(N+4, N+4, U, dx, dx, my_id, zones_to_do, num_procs, mpi_Vars);
    t+=dt;    
    //break; 
    //printf("what time is it = %f\n", dt);
    /*Broadcast U*/
    ierr = MPI_Bcast(U, (N+4)*(N+4), mpi_Vars, 0, MPI_COMM_WORLD);
    /*
    if(my_id == 0){ 
      if( count % 1 == 0){
	sprintf(str, "T_%d.dat", count);
	fid = fopen(str, "w");
	Write_Cons(N+4, N+4, U, dx, dx, fid);
	fclose(fid);
	//printf("T=%f\n", t);
      }
      count += 1;
      }*/
  }
  if(my_id == 0){
    /*I am Root*/
    printf("%d\n", count);
    fid = fopen("22data.dat","w");
    Write_Cons(N+4, N+4, U, dx, dx, fid);
    fclose(fid);
  }
  free(U);
  MPI_Finalize();
}
示例#22
0
/* subarray_2d_fortran_test1()
 *
 * Returns the number of errors encountered.
 */
int subarray_2d_fortran_test1(void)
{
    MPI_Datatype subarray;
    int array[12] = { -1, -2, -3, -4, 1, 2,
        -5, -6, -7, -8, -9, -10
    };
    int array_size[2] = { 6, 2 };
    int array_subsize[2] = { 2, 1 };
    int array_start[2] = { 4, 0 };

    int i, err, errs = 0, sizeoftype;

    /* set up type */
    err = MPI_Type_create_subarray(2,   /* dims */
                                   array_size,
                                   array_subsize,
                                   array_start, MPI_ORDER_FORTRAN, MPI_INT, &subarray);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr,
                    "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs);
        }
        return errs;
    }

    MPI_Type_commit(&subarray);
    MPI_Type_size(subarray, &sizeoftype);
    if (sizeoftype != 2 * sizeof(int)) {
        errs++;
        if (verbose)
            fprintf(stderr, "size of type = %d; should be %d\n",
                    sizeoftype, (int) (2 * sizeof(int)));
        return errs;
    }

    err = pack_and_unpack((char *) array, 1, subarray, 12 * sizeof(int));

    for (i = 0; i < 12; i++) {
        int goodval;
        switch (i) {
            case 4:
                goodval = 1;
                break;
            case 5:
                goodval = 2;
                break;
            default:
                goodval = 0;
                break;
        }
        if (array[i] != goodval) {
            errs++;
            if (verbose)
                fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval);
        }
    }

    MPI_Type_free(&subarray);
    return errs;
}
示例#23
0
void MPI_New_Datatype()
{
  /* Create new MPI datatype: ElemPack type definition in struct.h
     so structures of ElemPack and NeighborPack can be sent and received */
/*  MPI_Datatype ELEMTYPE;
  MPI_Datatype NEIGHTYPE;
  MPI_Datatype REFINED_INFO;
  MPI_Datatype ENRICHED_INFO;
  MPI_Datatype NSOLTYPE;
  MPI_Datatype LB_VERT_TYPE;*/

  int           blockcounts[3]={58, 25*KEYLENGTH, 102};
  MPI_Datatype  types[3];
  MPI_Aint      displs[3];
  int d;
  ElemPack* elem=new ElemPack;

  MPI_Address(&(elem->myprocess), &displs[0]);
  MPI_Address(&(elem->key[0]), &displs[1]);
  MPI_Address(&(elem->elevation), &displs[2]);  

  types[0]=MPI_INT;
  types[1]=MPI_UNSIGNED;
  types[2]=MPI_DOUBLE;

  for(d=2; d>=0; d--)
    displs[d]-=displs[0];

  MPI_Type_struct(3, blockcounts, displs, types, &ELEMTYPE);
  MPI_Type_commit(&ELEMTYPE);


  //create the 2nd new d_type

  int           blockcounts2[2]={2, 2*KEYLENGTH};
  MPI_Datatype  types2[2];
  MPI_Aint      displs2[2];

  NeighborPack* neigh=new NeighborPack;



  MPI_Address(&(neigh->target_proc), &displs2[0]);
  MPI_Address(&(neigh->elkey), &displs2[1]);

  types2[0]=MPI_INT;
  types2[1]=MPI_UNSIGNED;

  for(d=1; d>=0; d--)
    displs2[d]-=displs2[0];

  MPI_Type_struct(2, blockcounts2, displs2, types2, &NEIGHTYPE);
  MPI_Type_commit(&NEIGHTYPE);



  //create the 3rd new d_type

  int           blockcounts3[2]={1, 4*KEYLENGTH};
  MPI_Datatype  types3[2]={MPI_INT, MPI_UNSIGNED};
  MPI_Aint      displs3[2]={0,0};
  
  refined_neighbor_pack* fine=new refined_neighbor_pack;
  
  MPI_Address(&(fine->orig_gen), &displs3[0]);
  MPI_Address(&(fine->target_element), &displs3[1]);
  
  for(d=1; d>=0; d--)
    displs3[d]-=displs3[0];


  MPI_Type_struct(2, blockcounts3, displs3, types3, &REFINED_INFO);
  MPI_Type_commit(&REFINED_INFO);


  //create the 4th new d_type
  // for getting the neighbor solution in the new error estimator, when the neighbor is in diff subdomain
  
  int           blockcounts5[3]={6,KEYLENGTH,260};
  MPI_Datatype  types5[3];
  MPI_Aint      displs5[3];

  Neigh_Sol_Pack* neigh_sol = new Neigh_Sol_Pack;

  MPI_Address(&(neigh_sol->nside), &displs5[0]);
  MPI_Address((neigh_sol->key), &displs5[1]);
  MPI_Address((neigh_sol->solu), &displs5[2]);

  types5[0] = MPI_INT;
  types5[1] = MPI_UNSIGNED;
  types5[2] = MPI_DOUBLE;

  for(d=2; d>=0; d--)
    displs5[d]-=displs5[0]; 

  MPI_Type_struct(3, blockcounts5, displs5, types5, &NSOLTYPE);
  MPI_Type_commit(&NSOLTYPE);

  //delete neigh_sol;  //added by acbauer 4/3/02 -- may be a bug?????

  int blockcounts6[3] = {3,KEYLENGTH+1,1};
  MPI_Datatype types6[3] = {MPI_INT, MPI_UNSIGNED, MPI_FLOAT};
  MPI_Aint displs6[3];

  BSFC_VERTEX* sfc_vert_ptr = new BSFC_VERTEX;

  MPI_Address(&(sfc_vert_ptr->destination_proc), &displs6[0]);
  MPI_Address(&(sfc_vert_ptr->sfc_key[0]), &displs6[1]);
  MPI_Address(&(sfc_vert_ptr->lb_weight), &displs6[2]);

  for(d=2; d>=0; d--)
    displs6[d]-=displs6[0]; 

  MPI_Type_struct(3, blockcounts6, displs6, types6, &LB_VERT_TYPE);
  MPI_Type_commit(&LB_VERT_TYPE);
  

  //New data types are created at this point
    
}
示例#24
0
/* subarray_1d_c_test1()
 *
 * Returns the number of errors encountered.
 */
int subarray_1d_c_test1(void)
{
    MPI_Datatype subarray;
    int array[9] = { -1, 1, 2, 3, -2, -3, -4, -5, -6 };
    int array_size[] = { 9 };
    int array_subsize[] = { 3 };
    int array_start[] = { 1 };

    int i, err, errs = 0, sizeoftype;

    /* set up type */
    err = MPI_Type_create_subarray(1,   /* dims */
                                   array_size,
                                   array_subsize, array_start, MPI_ORDER_C, MPI_INT, &subarray);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr,
                    "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs);
        }
        return errs;
    }

    MPI_Type_commit(&subarray);
    MPI_Type_size(subarray, &sizeoftype);
    if (sizeoftype != 3 * sizeof(int)) {
        errs++;
        if (verbose)
            fprintf(stderr, "size of type = %d; should be %d\n",
                    sizeoftype, (int) (3 * sizeof(int)));
        return errs;
    }

    err = pack_and_unpack((char *) array, 1, subarray, 9 * sizeof(int));

    for (i = 0; i < 9; i++) {
        int goodval;
        switch (i) {
            case 1:
                goodval = 1;
                break;
            case 2:
                goodval = 2;
                break;
            case 3:
                goodval = 3;
                break;
            default:
                goodval = 0;    /* pack_and_unpack() zeros before unpacking */
                break;
        }
        if (array[i] != goodval) {
            errs++;
            if (verbose)
                fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval);
        }
    }

    MPI_Type_free(&subarray);
    return errs;
}
示例#25
0
int main( int argc, char *argv[] )
{
    int      errs = 0;
    MPI_Win  win;
    int  *rmabuffer=0, *getbuf=0;
    MPI_Aint bufsize=0, getbufsize=0;
    int      master, partner, next, wrank, wsize, i;
    int      ntest = LAST_TEST;
    int *srcbuf;

    MTest_Init( &argc, &argv );

    /* Determine who is responsible for each part of the test */
    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
    if (wsize < 3) {
	fprintf( stderr, "This test requires at least 3 processes\n" );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }

    master  = 0;
    partner = 1;
    next = wrank + 1;
    if (next == partner) next++;
    if (next >= wsize) {
	next = 0;
	if (next == partner) next++;
    }

    /* Determine the last test to run (by default, run them all) */
    for (i=1; i<argc; i++) {
	if (strcmp( "-ntest", argv[i] ) == 0) { 
	    i++;
	    if (i < argc) {
		ntest = atoi( argv[i] );
	    }
	    else {
		fprintf( stderr, "Missing value for -ntest\n" );
		MPI_Abort( MPI_COMM_WORLD, 1 );
	    }
	}
    }

    MPI_Type_vector( veccount, 1, stride, MPI_INT, &vectype );
    MPI_Type_commit( &vectype );

    /* Create the RMA window */
    bufsize = 0;
    if (wrank == master) {
	bufsize = RMA_SIZE;
	MPI_Alloc_mem( bufsize*sizeof(int), MPI_INFO_NULL, &rmabuffer );
    }
    else if (wrank == partner) {
	getbufsize = RMA_SIZE;
	getbuf = (int *)malloc( getbufsize*sizeof(int) );
	if (!getbuf) {
	    fprintf( stderr, "Unable to allocated %d bytes for getbuf\n", 
		    (int)getbufsize );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}
    }
    srcbuf = malloc(RMA_SIZE*sizeof(*srcbuf));
    assert(srcbuf);

    MPI_Win_create( rmabuffer, bufsize, sizeof(int), MPI_INFO_NULL,
		    MPI_COMM_WORLD, &win );
    
    /* Run a sequence of tests */
    for (i=0; i<=ntest; i++) {
	if (wrank == master) {
	    MTestPrintfMsg( 0, "Test %d\n", i );
	    /* Because this lock is local, it must return only when the
	     lock is acquired */
	    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win );
	    RMATestInit( i, rmabuffer, bufsize );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD );
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, 
		      MPI_COMM_WORLD, MPI_STATUS_IGNORE );
	    MPI_Win_unlock( master, win );
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD, 
		      MPI_STATUS_IGNORE );
	    errs += RMACheck( i, rmabuffer, bufsize );
	}
	else if (wrank == partner) {
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD,
		      MPI_STATUS_IGNORE );
	    MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win );
	    RMATest( i, win, master, srcbuf, RMA_SIZE, getbuf, getbufsize );
	    MPI_Win_unlock( master, win );
	    errs += RMACheckGet( i, win, getbuf, getbufsize );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD );
	}
	else {
	    MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, 
		      MPI_COMM_WORLD, MPI_STATUS_IGNORE );
	    MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD );
	}
    }

    if (rmabuffer) {
	MPI_Free_mem( rmabuffer );
    }
    if (getbuf) {
	free( getbuf );
    }
    MPI_Win_free( &win );
    MPI_Type_free( &vectype );

    MTest_Finalize( errs );
    MPI_Finalize();
    return MTestReturnValue( errs );
}
示例#26
0
/**
 * accumulates pieces of the spinor field on nodes with index 0 in the dimensions given in which
 * the collected data is returned
 */
void spinor_fft_reduce_2d(spinor *localSpinorField,int *collectionRank,spinor*** field_collection,spinor **membuff){
  /* this implementation is intended for four dimensional parallelisation */
#if (defined  PARALLELXYZT  && defined MPI && defined HAVE_FFTW)

  int sendRecvCoord[4];
  int i;
  int dims[]={g_nproc_t,g_nproc_x,g_nproc_y,g_nproc_z};


  /* logfile variables */
  char *logFilePrefix="Process";
  char logFileName[512];
  FILE *logFile;
  const int MSG_LOCALDATA = 457;
  MPI_Status ierr;
  MPI_Datatype mpi_local_spinor;
  const int which[]={0,1};


  (*field_collection)=NULL;
  (*membuff)=NULL;

/*   int result; */
  sprintf(logFileName,"./%s_%02d.log",logFilePrefix,g_cart_id);
  logFile=fopen(logFileName,"a");


  MPI_Type_contiguous(VOLUME, field_point, &mpi_local_spinor);
  MPI_Type_commit(&mpi_local_spinor);


  for(i=0;i<4;i++)
    sendRecvCoord[i]=g_proc_coords[i];

  if( g_proc_coords[which[0]] == 0 && g_proc_coords[which[1]] == 0 ){

      /* i am one of the nodes where data is accumulated */
      spinor **accu_field;
      spinor **fft_field;
      spinor *memory_buffer_accu_field;
      spinor *memory_buffer_fft_field;
      int REDUCTIONVOLUME=1;
      int recvRank;
      MPI_Request *requests;
      MPI_Status *status;
      int request_count=0;
      int num_requests;
      fftw_plan local_2d_fft_forward;

      *collectionRank=TRUE;

      /* calculate the number of reduced 2d volume accumulated in this node */
      
      /* number of spinor fields in local units */
      REDUCTIONVOLUME*=dims[which[0]]*dims[which[1]];

      /* number of receive messages */
      num_requests=REDUCTIONVOLUME-1;

      /* reserve space for receive messages */
      requests=(MPI_Request*)malloc(sizeof(MPI_Request)*num_requests);
      status=(MPI_Status*)malloc(sizeof(MPI_Status)*num_requests);

      fprintf(logFile,"reduction volume = %d\n",REDUCTIONVOLUME);

      /* allocate space for spinor field collection */
      allocate_spinor_field_array(&accu_field,&memory_buffer_accu_field,VOLUME,REDUCTIONVOLUME);
      allocate_spinor_field_array(&fft_field,&memory_buffer_fft_field,VOLUME,REDUCTIONVOLUME);


      /* receive from certain nodes pieces of the spinor field */
      for(sendRecvCoord[which[0]] = 0 ; sendRecvCoord[which[0]]< dims[which[0]] ; sendRecvCoord[which[0]]++){
	for(sendRecvCoord[which[1]] = 0 ; sendRecvCoord[which[1]]< dims[which[1]] ; sendRecvCoord[which[1]]++){
	  if( sendRecvCoord[which[0]] != 0 || sendRecvCoord[which[1]]  != 0){

	    MPI_Cart_rank(g_cart_grid,sendRecvCoord,&recvRank);

	    MPI_Irecv(accu_field[sendRecvCoord[which[0]]*dims[which[1]]+sendRecvCoord[which[1]] ] /* buffer */,
		     1, /* how may */
		     mpi_local_spinor, /* mpi data type */
		     recvRank, /* from whom i get it */
		     MSG_LOCALDATA, /* msg id */
		     g_cart_grid, /* communicator , status */
		     requests+request_count);
	    ++request_count;

	  }
	}
      }


      /* wait until all request finished */
      MPI_Waitall(num_requests, requests, status);

      assign(accu_field[0],localSpinorField,VOLUME);

      /* transpose in xp-t space */
      spinor_fft_transpose_xp_t(fft_field[0],accu_field[0],dims[0],dims[1],TRUE,1.);

      /* create fftw plan */
      local_2d_fft_forward=spinor_fftw_plan2d(fft_field[0],accu_field[0],T*dims[0],LX*dims[1],LY*LZ,1,FFTW_ESTIMATE);
      fftw_execute(local_2d_fft_forward);
      fftw_destroy_plan(local_2d_fft_forward);

/*       assign(accu_field[0],fft_field[0],VOLUME*REDUCTIONVOLUME); */


      free_spinor_field_array(&memory_buffer_fft_field); memory_buffer_fft_field=NULL;

/*       free_spinor_field_array(&memory_buffer_accu_field); memory_buffer_accu_field=NULL; */
      (*field_collection)=accu_field;
      (*membuff)=memory_buffer_accu_field;
      free(requests); requests = NULL;
      free(status); status=NULL;

    } else {
      int sendRank;
      MPI_Request request;
      MPI_Status status;

      *collectionRank=FALSE;

      /* coordinates of the "root" */
      sendRecvCoord[which[0]]=0;
      sendRecvCoord[which[1]]=0;

      MPI_Cart_rank(g_cart_grid,sendRecvCoord,&sendRank); 

      MPI_Isend(localSpinorField,1,mpi_local_spinor,sendRank,MSG_LOCALDATA,g_cart_grid,&request);

      MPI_Wait(&request,&status);

    }


    MPI_Type_free(&mpi_local_spinor);

    fclose(logFile);

#else
    if(g_proc_id==0)
      fprintf(stderr,"Error: Please choose FOUR dimensional parallelization!!!\n");

#endif
}
示例#27
0
int main(int argc, char *argv[]) {

  int my_id, nprocs;
  int mpi_dims[4]; 
  int period[4] = {0, 0, 0, 0};
  int coords[4];

  int dimsf[4] = {nbands, gpts, gpts, gpts};
  int count[4];
  int offset[4];
  int ndims = 4;

  double t0, t1;
#ifdef PAPI
  PAPI_dmem_info_t dmem;
  double mem1, mem2, mem1_max, mem2_max, mem1_ave, mem2_ave;
  int papi_err;
#endif

  double *my_data;

  MPI_Comm cart_comm;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  assert(argc == 5);
  for (int i=1; i < argc; i++)
    mpi_dims[i-1] = atoi(argv[i]);

  assert(mpi_dims[0] * mpi_dims[1] * mpi_dims[2] * mpi_dims[3] == nprocs);
  MPI_Cart_create(MPI_COMM_WORLD, 4, mpi_dims, period, 0, &cart_comm);
  MPI_Comm_rank(cart_comm, &my_id);

  MPI_Cart_coords(cart_comm, my_id, 4, coords);

  assert(nbands % mpi_dims[0] == 0);
  for (int i=1; i < 4; i++)
    assert(gpts % mpi_dims[i] == 0);

  int total_size = nbands*gpts*gpts*gpts;
  count[0] = nbands / mpi_dims[0];
  offset[0] = coords[0] * count[0];
  int data_size = count[0];
  for (int i=1; i < 4; i++)
    {
      count[i] = gpts/mpi_dims[i];
      offset[i] = coords[i] * count[i];
      data_size *= count[i];
    }

  my_data = (double *) malloc(data_size * sizeof(double));
  for (int i=0; i < data_size; i++)
    my_data[i] = my_id;

  MPI_Info info;
  MPI_File fp;
  MPI_Datatype filetype;
  // MPI_Info_set(info, "cb_nodes", "64");

  MPI_Barrier(MPI_COMM_WORLD);
#ifdef PAPI
  papi_err = PAPI_get_dmem_info(&dmem);
  if (papi_err != PAPI_OK)
    printf("PAPI_ERR\n");
  mem1 = (double)dmem.size / 1024.0;
  MPI_Reduce(&mem1, &mem1_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm);
  MPI_Reduce(&mem1, &mem1_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
  mem1_ave /= nprocs;
#endif
  t0 = MPI_Wtime();
  MPI_File_open(MPI_COMM_WORLD, "test.dat",
                  MPI_MODE_CREATE|MPI_MODE_WRONLY,
                  MPI_INFO_NULL, &fp);

  MPI_Type_create_subarray(ndims, dimsf, count, offset, MPI_ORDER_C, 
			   MPI_DOUBLE, &filetype);
  MPI_Type_commit(&filetype);
  MPI_File_set_view(fp, 0, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL);

  MPI_File_write_all(fp, my_data, data_size, MPI_DOUBLE, MPI_STATUS_IGNORE);

  MPI_Type_free(&filetype);
  MPI_File_close(&fp);
  MPI_Barrier(MPI_COMM_WORLD);
  t1 = MPI_Wtime();
#ifdef PAPI
  papi_err = PAPI_get_dmem_info(&dmem);
  if (papi_err != PAPI_OK)
    printf("PAPI_ERR\n");
  mem2 = (double)dmem.size/ 1024.0;
  MPI_Reduce(&mem2, &mem2_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm);
  MPI_Reduce(&mem2, &mem2_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
  mem2_ave /= nprocs;
#endif
  if (my_id == 0)
    {
      printf("IO time %f (%f) MB %f s\n", 
             total_size * 8/(1024.0*1024.0), 
             data_size * 8/(1024.0*1024.0), t1-t0);
#ifdef PAPI
      printf("Memory usage max (ave): %f (%f) %f (%f) \n", 
              mem1_max, mem1_ave, mem2_max, mem2_ave);
#endif
    }
      
  MPI_Finalize();
}
示例#28
0
/* test uses a vector type that describes data that is contiguous,
 * but processed in a noncontiguous way.  this is effectively the
 * same type as in the struct_negdisp_test above.
 */
int vector_negstride_test(void)
{
    int err, errs = 0;
    int sendbuf[6] = { 1, 2, 3, 4, 5, 6 };
    int recvbuf[6] = { -1, -2, -3, -4, -5, -6 };
    MPI_Datatype myvector;
    MPI_Request request;
    MPI_Status status;

    err = MPI_Type_vector(2, 1, -1, MPI_INT, &myvector);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Type_vector returned error\n");
	}
    }

    MPI_Type_commit(&myvector);

    err = MPI_Irecv(recvbuf+1, 4, MPI_INT, 0, 0, MPI_COMM_SELF, &request);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Irecv returned error\n");
	}
    }

    err = MPI_Send(sendbuf+2, 2, myvector, 0, 0, MPI_COMM_SELF);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Send returned error\n");
	}
    }

    err = MPI_Wait(&request, &status);
    if (err != MPI_SUCCESS) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "MPI_Wait returned error\n");
	}
    }

    /* verify data */
    if (recvbuf[0] != -1) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[0] = %d; should be %d\n", recvbuf[0], -1);
	}
    }
    if (recvbuf[1] != 3) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[1] = %d; should be %d\n", recvbuf[1], 3);
	}
    }
    if (recvbuf[2] != 2) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[2] = %d; should be %d\n", recvbuf[2], 2);
	}
    }
    if (recvbuf[3] != 5) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[3] = %d; should be %d\n", recvbuf[3], 5);
	}
    }
    if (recvbuf[4] != 4) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[4] = %d; should be %d\n", recvbuf[4], 4);
	}
    }
    if (recvbuf[5] != -6) {
	errs++;
	if (verbose) {
	    fprintf(stderr, "recvbuf[5] = %d; should be %d\n", recvbuf[5], -6);
	}
    }

    MPI_Type_free(&myvector);

    return errs;
}
示例#29
0
/* ADIOI_PVFS2_Open:
 *  one process opens (or creates) the file, then broadcasts the result to the
 *  remaining processors. 
 *
 *  ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before
 * that, MPI_MODE_EXCL) was set.  Because PVFS2 handles file lookup and
 * creation more scalably than other file systems, ADIO_Open now skips any
 * special handling when CREATE is set.  */
void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code)
{
    int rank, ret;
    PVFS_fs_id cur_fs;
    static char myname[] = "ADIOI_PVFS2_OPEN";
    char pvfs_path[PVFS_NAME_MAX] = {0};

    ADIOI_PVFS2_fs *pvfs2_fs;

    /* since one process is doing the open, that means one process is also
     * doing the error checking.  define a struct for both the object reference
     * and the error code to broadcast to all the processors */

    open_status o_status = {0, {0, 0}};
    MPI_Datatype open_status_type;
    MPI_Datatype types[2] = {MPI_INT, MPI_BYTE};
    int lens[2] = {1, sizeof(PVFS_object_ref)};
    MPI_Aint offsets[2];
    
    pvfs2_fs = (ADIOI_PVFS2_fs *) ADIOI_Malloc(sizeof(ADIOI_PVFS2_fs));

    /* --BEGIN ERROR HANDLING-- */
    if (pvfs2_fs == NULL) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   MPI_ERR_UNKNOWN,
					   "Error allocating memory", 0);
	return;
    }
    /* --END ERROR HANDLING-- */

    MPI_Comm_rank(fd->comm, &rank);

    ADIOI_PVFS2_Init(error_code);
    if (*error_code != MPI_SUCCESS)
    {
	/* ADIOI_PVFS2_INIT handles creating error codes on its own */
	return;
    }

    /* currently everyone gets their own credentials */
    ADIOI_PVFS2_makecredentials(&(pvfs2_fs->credentials));

    /* one process resolves name and will later bcast to others */
    if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) {
	/* given the filename, figure out which pvfs filesystem it is on */
	ret = PVFS_util_resolve(fd->filename, &cur_fs, 
		pvfs_path, PVFS_NAME_MAX);
	if (ret < 0 ) {
	    PVFS_perror("PVFS_util_resolve", ret);
	    /* TODO: pick a good error for this */
	    o_status.error = -1;
	} else  {
	    fake_an_open(cur_fs, pvfs_path,
                         fd->access_mode, fd->hints->striping_factor,
                         fd->hints->striping_unit,
                         pvfs2_fs, &o_status);
	}

	/* store credentials and object reference in fd */
	pvfs2_fs->object_ref = o_status.object_ref;
	fd->fs_ptr = pvfs2_fs;
    }

    /* broadcast status and (possibly valid) object reference */
    MPI_Address(&o_status.error, &offsets[0]);
    MPI_Address(&o_status.object_ref, &offsets[1]);

    MPI_Type_struct(2, lens, offsets, types, &open_status_type);
    MPI_Type_commit(&open_status_type);

    /* Assertion: if we hit this Bcast, then all processes collectively
     *            called this open.
     *
     * That's because deferred open never happens with PVFS2.
     */
    MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0],
	      fd->comm);
    MPI_Type_free(&open_status_type);

    /* --BEGIN ERROR HANDLING-- */
    if (o_status.error != 0)
    { 
	ADIOI_Free(pvfs2_fs);
	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
					   MPIR_ERR_RECOVERABLE,
					   myname, __LINE__,
					   ADIOI_PVFS2_error_convert(o_status.error),
					   "Unknown error", 0);
	/* TODO: FIX STRING */
	return;
    }
    /* --END ERROR HANDLING-- */

    pvfs2_fs->object_ref = o_status.object_ref;
    fd->fs_ptr = pvfs2_fs;

    *error_code = MPI_SUCCESS;
    return;
}
示例#30
0
int main(int argc, char *argv[])
{
    /* Variable declarations */
    int a[100][100], b[100][100];
    MPI_Datatype row, xpose;
    MPI_Aint sizeofint;
	
    int /* err, */ errs = 0;
    int bufsize, position = 0;
    void *buffer;
  
    int i, j;
  
    /* Initialize a to some known values. */
    for(i = 0; i < 100; i++) {
	for(j = 0; j < 100; j++) {
	    a[i][j] = i*1000+j;
	    b[i][j] = -1;
	}
    }
  
    /* Initialize MPI */
    MPI_Init(&argc, &argv);
    parse_args(argc, argv);

    MPI_Type_extent(MPI_INT, &sizeofint);
	
    /* Create datatypes. */
    MPI_Type_vector(100, 1, 100, MPI_INT, &row);
    MPI_Type_hvector(100, 1, sizeofint, row, &xpose);
    MPI_Type_commit(&xpose);
	
    /* Pack it. */
    MPI_Pack_size(1, xpose, MPI_COMM_WORLD, &bufsize);
    buffer = (char *) malloc((unsigned) bufsize);

    /* To improve reporting of problems about operations, we
       change the error handler to errors return */
    MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN );

    /* err = */ MPI_Pack(a,
		   1,
		   xpose,
		   buffer,
		   bufsize,
		   &position,
		   MPI_COMM_WORLD);
	
    /* Unpack the buffer into b. */
    position = 0;
    /* err = */ MPI_Unpack(buffer,
		     bufsize,
		     &position,
		     b,
		     100*100,
		     MPI_INT,
		     MPI_COMM_WORLD);

    for (i = 0; i < 100; i++) {
	for (j = 0; j < 100; j++) {
	    if(b[i][j] != a[j][i]) {
		errs++;
		if (verbose) fprintf(stderr, "b[%d][%d] = %d, should be %d\n",
				     i, j, b[i][j], a[j][i]);
	    }
	}
    }

    MPI_Type_free(&xpose);
    MPI_Type_free(&row);
    
    /* print message and exit */
    if (errs) {
	fprintf(stderr, "Found %d errors\n", errs);
    }
    else {
	printf(" No Errors\n");
    }
    MPI_Finalize();
    free(buffer);
    return 0;
}