int main(int argc, char* argv[])
{
    MPI_Init(&argc,&argv);
    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &MPI_COMM_NODE);

    int n = (argc>1) ? atoi(argv[1]) : 1000;

    int wrank, wsize;
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    int nrank, nsize;
    MPI_Comm_rank(MPI_COMM_WORLD, &nrank);
    MPI_Comm_size(MPI_COMM_WORLD, &nsize);

    char * buf1 = NULL;
    char * buf2 = NULL;
    MPI_Alloc_mem(n, MPI_INFO_NULL, &buf1);
    MPI_Alloc_mem(n, MPI_INFO_NULL, &buf2);

    memset(buf1, nrank==0 ? 'Z' : 'A', n);
    memset(buf2, nrank==0 ? 'Z' : 'A', n);

    double t0, t1, dt;
    for (int r=0; r<20; r++) {
        MPI_Barrier(MPI_COMM_WORLD);
        t0 = MPI_Wtime();
        MPI_Bcast(buf1, n, MPI_CHAR, 0, MPI_COMM_NODE);
        t1 = MPI_Wtime();
        dt = t1-t0;
        printf("%d: MPI_Bcast: %lf seconds, %lf MB/s \n", wrank, dt, n*(1.e-6/dt));
        fflush(stdout);

        MPI_Barrier(MPI_COMM_WORLD);
        t0 = MPI_Wtime();
        SMP_Bcast(buf2, n, MPI_CHAR, 0, MPI_COMM_NODE);
        t1 = MPI_Wtime();
        dt = t1-t0;
        printf("%d: SMP_Bcast: %lf seconds, %lf MB/s \n", wrank, dt, n*(1.e-6/dt));
        fflush(stdout);

        if (r==0) {
            char * tmp = malloc(n);
            memset(tmp, 'Z', n);
            int err1 = memcmp(tmp, buf1, n);
            int err2 = memcmp(tmp, buf2, n);
            if (err1>0 || err2>0) {
                printf("%d: errors: MPI (%d), SMP (%d) \n", wrank, err1, err2);
            }
        }
    }

    MPI_Free_mem(buf1);
    MPI_Free_mem(buf2);

    MPI_Comm_free(&MPI_COMM_NODE);

    MPI_Finalize();
    return 0;
}
示例#2
0
int main(int argc, char * argv[])
{
    const MPI_Count test_int_max = BigMPI_Get_max_int();

    MPI_Init(&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (size<1) {
        printf("Use 1 or more processes. \n");
        MPI_Finalize();
        return 1;
    }

    int l = (argc > 1) ? atoi(argv[1]) : 2;
    int m = (argc > 2) ? atoi(argv[2]) : 17777;
    MPI_Count n = l * test_int_max + m;

    char * buf_send = NULL;
    char * buf_recv = NULL;

    MPI_Alloc_mem((MPI_Aint)n * size, MPI_INFO_NULL, &buf_send);
    assert(buf_send!=NULL);
    MPI_Alloc_mem((MPI_Aint)n,        MPI_INFO_NULL, &buf_recv);
    assert(buf_recv!=NULL);

    if (rank==0) {
        for (int i = 0; i < size; ++i) {
            for (MPI_Count j = 0; j < n; ++j) {
                buf_send[i*n+j] = (unsigned char)i;
            }
        }
    }
    memset(buf_recv, -1, (size_t)n);

    /* collective communication */
    MPIX_Scatter_x(buf_send, n, MPI_CHAR,
                   buf_recv, n, MPI_CHAR,
                   0 /* root */, MPI_COMM_WORLD);

    size_t errors = verify_buffer(buf_recv, n, rank);

    MPI_Free_mem(buf_send);
    MPI_Free_mem(buf_recv);

    if (rank==0 && errors==0) {
        printf("SUCCESS\n");
    }

    MPI_Finalize();

    return 0;
}
示例#3
0
int main (int argc, char *argv[])
{
        struct pe_vars v;
	long * msg_buffer;
	/*
	 * Initialize
	 */
	init_mpi(&v);
	check_usage(argc, argv, v.npes, v.me);
	print_header(v.me);

	if (v.me == 0) printf("Total processes = %d\n",v.npes);
	/*
	 * Allocate Memory
	 */
	msg_buffer = allocate_memory(v.me, &(v.win) );
	memset(msg_buffer, 0, MAX_MSG_SZ * ITERS_LARGE * sizeof(long));
	/*
	 * Time Put Message Rate
	 */
	benchmark(msg_buffer, v.me, v.pairs, v.nxtpe, v.win);
	/*
	 * Finalize
	 */
	MPI_Win_unlock_all(v.win);
	MPI_Win_free(&v.win); 	
	MPI_Free_mem(msg_buffer);

	MPI_Finalize();

	return EXIT_SUCCESS;
}
示例#4
0
int main (int argc,char *argv[]) {
  int       i;
  double    w[NEL];
  MPI_Aint  win_size,warr_size;
  MPI_Win  *win;

  win_size=sizeof(MPI_Win);
  warr_size=sizeof(MPI_DOUBLE)*NEL;
  
      
  MPI_Init (&argc, &argv);
  
  for(i=0;i<NTIMES;i++) {
      MPI_Alloc_mem(win_size,MPI_INFO_NULL,&win);

      MPI_Win_create(w,warr_size,sizeof(double),MPI_INFO_NULL,MPI_COMM_WORLD,win);
      MPI_Win_free(win);

      MPI_Free_mem(win);
  }

  MPI_Finalize();

  return 0;

}
示例#5
0
void IMB_del_r_buf(struct comm_info* c_info )
/*


                      Deletes recv buffer component of c_info



In/out variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information



*/
{
    /* July 2002 V2.2.1 change: use MPI_Free_mem */
    if ( c_info->r_alloc> 0)
    {
#if (defined EXT || defined MPIIO || defined RMA)
        MPI_Free_mem( c_info->r_buffer );
#else
        IMB_v_free( (void**)&c_info->r_buffer );
#endif

        c_info-> r_alloc = 0;
        c_info->r_buffer = NULL;

    }
}
示例#6
0
文件: mtest.c 项目: mpoquet/simgrid
/* Free the storage associated with a window object */
void MTestFreeWin(MPI_Win * win)
{
    void *addr;
    int flag, merr;

    merr = MPI_Win_get_attr(*win, MPI_WIN_BASE, &addr, &flag);
    if (merr)
        MTestPrintError(merr);
    if (!flag) {
        MTestError("Could not get WIN_BASE from window");
    }
    if (addr) {
        void *val;
        merr = MPI_Win_get_attr(*win, mem_keyval, &val, &flag);
        if (merr)
            MTestPrintError(merr);
        if (flag) {
            if (val == (void *) 1) {
                free(addr);
            }
            else if (val == (void *) 2) {
                merr = MPI_Free_mem(addr);
                if (merr)
                    MTestPrintError(merr);
            }
            /* if val == (void *)0, then static data that must not be freed */
        }
    }
    merr = MPI_Win_free(win);
    if (merr)
        MTestPrintError(merr);
}
示例#7
0
int main(int argc, char *argv[])
{
    int errs = 0, err;
    int j, count;
    char *ap;

    MTest_Init(&argc, &argv);

    MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
    for (count = 1; count < 128000; count *= 2) {

        err = MPI_Alloc_mem(count, MPI_INFO_NULL, &ap);
        if (err) {
            int errclass;
            /* An error of  MPI_ERR_NO_MEM is allowed */
            MPI_Error_class(err, &errclass);
            if (errclass != MPI_ERR_NO_MEM) {
                errs++;
                MTestPrintError(err);
            }

        } else {
            /* Access all of this memory */
            for (j = 0; j < count; j++) {
                ap[j] = (char) (j & 0x7f);
            }
            MPI_Free_mem(ap);
        }
    }

    MTest_Finalize(errs);
    return MTestReturnValue(errs);
}
void SweptDiscretization2D::updateRemoteConstants(unsigned char *buffer)
{
	void *sendingBuffer = NULL;
	FILE *inFile = NULL;

	if(pg.rank == 0)
	{
		int bufferSize = this->remoteConstantsCount * n * n * pg.mpiSize * sizeof(double);
		MPI_Alloc_mem(bufferSize, MPI_INFO_NULL, &sendingBuffer);
		
		for(int r=0;r<pg.mpiSize;r++)
		{
			double *processing = (double*)sendingBuffer + (this->remoteConstantsCount * n * n * r);
			int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes;
			int iIndex = r % pg.xNodes;
			for(int j=0;j<n;j++)
			{
				for(int i=0;i<n;i++)
				{
					int iGlobal = n*iIndex + (i);
					int jGlobal = n*jIndex + (j);
					int index   = this->ijToConstantIndex(i,j);
					int globalIndex = this->remoteConstantsCount * (iGlobal + jGlobal * n * pg.xNodes);
					for(int k=0;k<this->remoteConstantsCount;k++)
					{
						processing[index + k] = ((double*)buffer)[k + globalIndex];
					}					
				}
			}		
		}
	}
	
	MPI_Win_fence(MPI_MODE_NOPRECEDE, this->constantsWindow);
	if(pg.rank == 0)
	{
		for(int r=0;r<pg.mpiSize;r++)
		{
			MPI_Put((unsigned char*)sendingBuffer + (r * remoteConstantsCount * n * n * sizeof(double)), remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, r, 0, remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, constantsWindow);			
		}
	}
	MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), this->constantsWindow);

	if(pg.rank == 0)
	{
		MPI_Free_mem(sendingBuffer);					
	}
	for(int i=1;i<n+1;i++)
	{
		for(int j=1;j<n+1;j++)
		{
			for(int k=0;k<this->remoteConstantsCount;k++)
			{
				int windowIndex    = this->ijToConstantIndex(i-1,j-1);
				int foundationIndex = this->ijToIndex(i,j);
				this->foundation[foundationIndex + k] = this->remoteConstants[windowIndex + k];
			}
		}
	}
}
示例#9
0
文件: wrapper.c 项目: rscohn2/MLSL
int MPI_Free_mem(void* baseptr)
{
    if (max_ep > 0)
    {
        EPLIB_free(baseptr);
        return MPI_SUCCESS;
    }
    return MPI_Free_mem(baseptr);
}
int main(int argc, char** argv) {
  MPI_Init(&argc, &argv);

  int my_rank; // Number of the node
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

  int node_count; // Total number of nodes
  MPI_Comm_size(MPI_COMM_WORLD, &node_count);
  
  // The root must load the input data to distribute to the other nodes
  if(my_rank == 0) {
    // In our case it generates a random array as input data
    srand(time(NULL));
    for(int item = 0; item < items; ++item)
      array[item] = rand();
  }
  
  int items_per_rank = items / node_count;
  int remainder_items = items % node_count;
  int* my_work;
  MPI_Alloc_mem(items_per_rank * sizeof(int), MPI_INFO_NULL, &my_work);
 
  // MPI_Scatter is a collective operation which distributes an equal-sized part of the given array to each node.
  MPI_Scatter(&array[remainder_items] /* send buffer */, items_per_rank /* send count per node */, MPI_INT /* send type */,
	      my_work /* receive buffer on each node */, items_per_rank /* receive count */ , MPI_INT /* receive type */, 
	      0 /* send buffer is stored on this rank */, MPI_COMM_WORLD /* communication channel */);
 
  // This is the actual working-loop
  long sub_sum = 0;
  for(int i=0; i < items_per_rank; i++)
    sub_sum += my_work[i];

  if(my_rank == 0) { // Scatter cannot deal with a division remainder so we manually deal with it
    while(remainder_items > 0)
      sub_sum += array[--remainder_items];
  }

  MPI_Free_mem(my_work);

  // MPI_Reduce with op-code MPI_SUM is a collective operation which sums up the input sub_sum of each node
  // into single a resulting output sum on the master.
  MPI_Reduce(&sub_sum /* input to sum up */, &sum /* output */, 1 /* input count */, MPI_LONG /* input type */,
	     MPI_SUM /* operation */, 0 /* output is stored on this rank */, MPI_COMM_WORLD /* communication channel */);
 
  if(my_rank == 0) {
    // The result of the computation now is available on rank 0.
    // We compare it with the sequential reference implementation to test our parallel implementation.
    if(sum == sum__sequential_reference_implementation())
      fprintf(stderr, "Test OK.\n");
    else
      fprintf(stderr, "Test FAILED!\n");
  }

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
  return EXIT_SUCCESS;
}
示例#11
0
void SweptDiscretization2D::allGatherAllOutputToFile(string filename)
{

	void *buffer = NULL;
	FILE *output;
	if(pg.rank == 0)
	{
		MPI_Alloc_mem(foundationSize * pg.mpiSize * sizeof(double), MPI_INFO_NULL, &buffer);		
		output = fopen(filename.c_str(),"wb");
	}

	MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), foundationWindow);
	if(pg.rank == 0)
	{
		for(int r=0;r<pg.mpiSize;r++)
		{
			MPI_Get((char*)buffer + (r * foundationSize * sizeof(double)), foundationSize * sizeof(double), MPI_BYTE, r, 0, foundationSize * sizeof(double), MPI_BYTE, foundationWindow);			
		}
	}
	MPI_Win_fence(MPI_MODE_NOSUCCEED, foundationWindow);
	
	if(pg.rank == 0)
	{
		int w = (n * pg.xNodes);
		int h = (n * pg.yNodes);
		int resultArraySize = w * h;
		if(resultArray == NULL)
			resultArray = (double*) malloc(resultArraySize * sizeof(double) * outputLength);
		
		for(int r=0;r<pg.mpiSize;r++)
		{
			double *processing = (double*)buffer + (foundationSize * r);			
			int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes;
			int iIndex = r % pg.xNodes;
			for(int j=1;j<n+1;j++)
			{
				for(int i=1;i<n+1;i++)
				{
					int iGlobal = n*iIndex + (i-1);
					int jGlobal = n*jIndex + (j-1);
					int index   = this->ijToIndex(i,j);
					for(int point=0;point<outputLength;point++)
					{
						double val  = processing[index + constants + point];
						int resultIndex = (iGlobal + jGlobal * n * pg.xNodes) * outputLength + point;
						resultArray[resultIndex] = val;
					}					
				}
			}		
		}
		fwrite((const void*)resultArray,sizeof(double),resultArraySize,output);
		fclose(output);
		MPI_Free_mem(buffer);
				
	}
	MPI_Barrier(MPI_COMM_WORLD);
}
示例#12
0
static int _ZMPI_Alltoall_int_proclists_put(int alloc_mem, int nphases, int *sendbuf, int nsprocs, int *sprocs, int *recvbuf, int nrprocs, int *rprocs, MPI_Comm comm)
{
  int i, p, size, rank, *rcounts_put;

  MPI_Win win;


  MPI_Comm_size(comm, &size);
  MPI_Comm_rank(comm, &rank);

  if (alloc_mem) MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rcounts_put);
  else rcounts_put = recvbuf;

  if (nrprocs >= 0)
    for (i = 0; i < nrprocs; ++i) rcounts_put[rprocs[i]] = DEFAULT_INT;
  else
    for (i = 0; i < size; ++i) rcounts_put[i] = DEFAULT_INT;

  MPI_Win_create(rcounts_put, size * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win);
  MPI_Win_fence(MPI_MODE_NOSTORE|MPI_MODE_NOPRECEDE, win);

  for (p = 0; p < nphases; ++p)
  {
/*    printf("%d: phase = %d of %d\n", rank, p, nphases);*/
  
    if (rank % nphases == p)
    {
      if (nsprocs >= 0)
      {
        for (i = 0; i < nsprocs; ++i)
          if (sendbuf[sprocs[i]] != DEFAULT_INT) MPI_Put(&sendbuf[sprocs[i]], 1, MPI_INT, sprocs[i], rank, 1, MPI_INT, win);

      } else
      {
        for (i = 0; i < size; ++i)
          if (sendbuf[i] != DEFAULT_INT) MPI_Put(&sendbuf[i], 1, MPI_INT, i, rank, 1, MPI_INT, win);
      }
    }

    if (p < nphases - 1) MPI_Win_fence(0, win);
  }

  MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED, win);
  MPI_Win_free(&win);

  if (alloc_mem)
  {
    if (nrprocs >= 0)
      for (i = 0; i < nrprocs; ++i) recvbuf[rprocs[i]] = rcounts_put[rprocs[i]];
    else
      for (i = 0; i < size; ++i) recvbuf[i] = rcounts_put[i];

    MPI_Free_mem(rcounts_put);    
  }

  return MPI_SUCCESS;
}
示例#13
0
文件: mem.c 项目: RWTH-OS/MP-MPICH
void mpp_free (void *buf)
{
#if HAVE_MPI_ALLOC_MEM
    if (use_mpi_alloc) 
	MPI_Free_mem (buf);
    else 
#endif	
	free (buf);
    return;
}
示例#14
0
/**
  * Wrappers for MPI_Free_mem for computers which may not have them (MPI-1 computers).
  */
static void socket_freeMem(socket_t * s)
{
	assert(s->buffer);
#ifndef MC_NO_MPI_ALLOC_MEM
	MPI_Free_mem(s->buffer);
#else
	free(s->buffer);
#endif
	s->buffer = 0;
}
示例#15
0
void mpiofstream::flush()
{
  MPI_Status status;
  char* buf;
  MPI_Alloc_mem(ss.str().length()+1, MPI_INFO_NULL, &buf);
  strcpy(buf, ss.str().c_str());
  MPI_File_write_shared(fh, buf, ss.str().length(), MPI_CHAR, &status);
  MPI_Free_mem(buf);
  ss.str("");
}
示例#16
0
/** One-sided accumulate operation.
  *
  * @param[in] datatype ARMCI data type for the accumulate operation (see armci.h)
  * @param[in] scale    Pointer for a scalar of type datatype that will be used to
  *                     scale values in the source buffer
  * @param[in] src      Source address (remote)
  * @param[in] dst      Destination address (local)
  * @param[in] bytes    Number of bytes to transfer
  * @param[in] proc     Process id to target
  * @return             0 on success, non-zero on failure
  */
int PARMCI_Acc(int datatype, void *scale, void *src, void *dst, int bytes, int proc) {
  void  *src_buf;
  int    count, type_size, scaled;
  MPI_Datatype type;
  gmr_t *src_mreg, *dst_mreg;

  /* If NOGUARD is set, assume the buffer is not shared */
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
    src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  else
    src_mreg = NULL;

  dst_mreg = gmr_lookup(dst, proc);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Prepare the input data: Apply scaling if needed and acquire the DLA lock if
   * needed.  We hold the DLA lock if (src_buf == src && src_mreg != NULL). */

  scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

  if (scaled) {
      MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
      ARMCII_Assert(src_buf != NULL);
      ARMCII_Buf_acc_scale(src, src_buf, bytes, datatype, scale);
  } else {
    src_buf = src;
  }

  /* Check if we need to copy: user requested it or same mem region */
  if (   (src_buf == src) /* buf_prepare didn't make a copy */
      && (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY || src_mreg == dst_mreg) )
  {
    MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);
    ARMCI_Copy(src, src_buf, bytes);
  }

  ARMCII_Acc_type_translate(datatype, &type, &type_size);
  count = bytes/type_size;

  ARMCII_Assert_msg(bytes % type_size == 0, 
      "Transfer size is not a multiple of the datatype size");

  /* TODO: Support a local accumulate operation more efficiently */

  gmr_accumulate(dst_mreg, src_buf, dst, count, type, proc);
  gmr_flush(dst_mreg, proc, 1); /* flush_local */

  if (src_buf != src)
    MPI_Free_mem(src_buf);

  return 0;
}
示例#17
0
/** Finish a set of prepared buffers.  Will perform communication and copies as
  * needed to ensure results are in the original buffers.  Temporary space will be
  * freed.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  */
void ARMCII_Buf_finish_acc_vec(void **orig_bufs, void **new_bufs, int count, int size) {
  int i;

  for (i = 0; i < count; i++) {
    if (orig_bufs[i] != new_bufs[i]) {
      MPI_Free_mem(new_bufs[i]);
    }
  }

  free(new_bufs);
}
示例#18
0
/** Destroy/free a mutex group.  Collective.
  * 
  * @param[in] hdl Group to destroy
  */
int ARMCIX_Destroy_mutexes_hdl(armcix_mutex_hdl_t hdl) {
  MPI_Win_free(&hdl->window);
  
  if (hdl->base) 
    MPI_Free_mem(hdl->base);

  MPI_Comm_free(&hdl->comm);

  free(hdl);
  
  return 0;
}
示例#19
0
void test_put(void)
{
    int me, nproc;
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
    MPI_Win dst_win;
    double *dst_buf;
    double src_buf[MAXELEMS];
    int i, j;

    MPI_Alloc_mem(sizeof(double) * nproc * MAXELEMS, MPI_INFO_NULL, &dst_buf);
    MPI_Win_create(dst_buf, sizeof(double) * nproc * MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD,
                   &dst_win);

    for (i = 0; i < MAXELEMS; i++)
        src_buf[i] = me + 1.0;

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, dst_win);

    for (i = 0; i < nproc * MAXELEMS; i++)
        dst_buf[i] = 0.0;

    MPI_Win_unlock(me, dst_win);

    MPI_Barrier(MPI_COMM_WORLD);

    for (i = 0; i < nproc; i++) {
        int target = i;

        for (j = 0; j < COUNT; j++) {
            if (verbose)
                printf("%2d -> %2d [%2d]\n", me, target, j);
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win);
            MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target,
                    (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win);
            MPI_Win_unlock(target, dst_win);
        }

        for (j = 0; j < COUNT; j++) {
            if (verbose)
                printf("%2d <- %2d [%2d]\n", me, target, j);
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win);
            MPI_Get(&src_buf[j], sizeof(double), MPI_BYTE, target,
                    (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win);
            MPI_Win_unlock(target, dst_win);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Win_free(&dst_win);
    MPI_Free_mem(dst_buf);
}
示例#20
0
/** One-sided put operation.
  *
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] target Process id to target
  * @return           0 on success, non-zero on failure
  */
int ARMCI_Put(void *src, void *dst, int size, int target) {
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  dst_mreg = gmr_lookup(dst, target);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Local operation */
  if (target == ARMCI_GROUP_WORLD.rank) {
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_lock(dst_mreg);
      if (src_mreg) gmr_dla_lock(src_mreg);
    }

    ARMCI_Copy(src, dst, size);
    
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_unlock(dst_mreg);
      if (src_mreg) gmr_dla_unlock(src_mreg);
    }
  }

  /* Origin buffer is private */
  else if (src_mreg == NULL || ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_NOGUARD) {
    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src, dst, size, target);
    gmr_unlock(dst_mreg, target);
  }

  /* COPY: Either origin and target buffers are in the same window and we can't
   * lock the same window twice (MPI semantics) or the user has requested
   * always-copy mode. */
  else {
    void *src_buf;

    MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);

    gmr_dla_lock(src_mreg);
    ARMCI_Copy(src, src_buf, size);
    gmr_dla_unlock(src_mreg);

    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src_buf, dst, size, target);
    gmr_unlock(dst_mreg, target);

    MPI_Free_mem(src_buf);
  }

  return 0;
}
示例#21
0
/** Finish a set of prepared buffers.  Will perform communication and copies as
  * needed to ensure results are in the original buffers.  Temporary space will be
  * freed.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  */
void ARMCII_Buf_finish_read_vec(void **orig_bufs, void **new_bufs, int count, int size) {
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    int i;

    for (i = 0; i < count; i++) {
      if (orig_bufs[i] != new_bufs[i]) {
        MPI_Free_mem(new_bufs[i]);
      }
    }

    free(new_bufs);
  }
}
示例#22
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int rank, size, i;
    MPI_Comm      comm;
    MPI_Win       win;
    int           *winbuf, count;

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;

    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );

    /* Allocate and initialize buf */
    count  = 1000;

    MPI_Alloc_mem( count*sizeof(int), MPI_INFO_NULL, &winbuf );

    MPI_Win_create( winbuf, count * sizeof(int), sizeof(int), MPI_INFO_NULL, 
		    comm, &win );

    /* Clear winbuf */
    memset( winbuf, 0, count*sizeof(int) );

    /* Note that for i == rank, this is a useful operation - it allows 
       the programmer to use direct loads and stores, rather than 
       put/get/accumulate, to access the local memory window. */
    for (i=0; i<size; i++) {
	MPI_Win_lock( MPI_LOCK_EXCLUSIVE, i, 0, win );
	MPI_Win_unlock( i, win );
    }

    for (i=0; i<size; i++) {
	MPI_Win_lock( MPI_LOCK_SHARED, i, 0, win );
	MPI_Win_unlock( i, win );
    }

    MPI_Win_free( &win );
    MPI_Free_mem( winbuf );

    /* If this test completes, no error has been found */
    /* A more complete test may ensure that local locks in fact block
       remote, exclusive locks */
    MTest_Finalize( errs );

    MPI_Finalize();
    return 0;
}
示例#23
0
MTEST_THREAD_RETURN_TYPE run_test(void *arg)
{
    int i;
    double *local_b;

    MPI_Alloc_mem(COUNT * sizeof(double), MPI_INFO_NULL, &local_b);

    for (i = 0; i < LOOPS; i++) {
        MPI_Get(local_b, COUNT, MPI_DOUBLE, 0, 0, COUNT, MPI_DOUBLE, win);
        MPI_Win_flush_all(win);
    }

    MPI_Free_mem(local_b);

    return (MTEST_THREAD_RETURN_TYPE) NULL;
}
示例#24
0
/** One-sided get operation.
  *
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] target Process id to target
  * @return           0 on success, non-zero on failure
  */
int PARMCI_Get(void *src, void *dst, int size, int target) {
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, target);

  /* If NOGUARD is set, assume the buffer is not shared */
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
    dst_mreg = gmr_lookup(dst, ARMCI_GROUP_WORLD.rank);
  else
    dst_mreg = NULL;

  ARMCII_Assert_msg(src_mreg != NULL, "Invalid remote pointer");

  /* Local operation */
  if (target == ARMCI_GROUP_WORLD.rank && dst_mreg == NULL) {
    ARMCI_Copy(src, dst, size);
  }

  /* Origin buffer is private */
  else if (dst_mreg == NULL) {
    gmr_get(src_mreg, src, dst, size, target);
    gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */
  }

  /* COPY: Either origin and target buffers are in the same window and we can't
   * lock the same window twice (MPI semantics) or the user has requested
   * always-copy mode. */
  else {
    void *dst_buf;

    MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf);
    ARMCII_Assert(dst_buf != NULL);

    gmr_get(src_mreg, src, dst_buf, size, target);
    gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */

    ARMCI_Copy(dst_buf, dst, size);

    MPI_Free_mem(dst_buf);
  }

  return 0;
}
示例#25
0
/** Destroy a group of ARMCI mutexes.  Collective.
  *
  * @param[in] hdl Handle to the group that should be destroyed.
  * @return        Zero on success, non-zero otherwise.
  */
int ARMCIX_Destroy_mutexes_hdl(armcix_mutex_hdl_t hdl) {
  int i;

  for (i = 0; i < hdl->max_count; i++) {
    MPI_Win_free(&hdl->windows[i]);
  }
    
  if (hdl->bases != NULL) {
    for (i = 0; i < hdl->my_count; i++)
      MPI_Free_mem(hdl->bases[i]);

    free(hdl->bases);
  }

  ARMCI_Group_free(&hdl->grp);
  free(hdl->windows);
  free(hdl);

  return 0;
}
示例#26
0
/** Finish a set of prepared buffers.  Will perform communication and copies as
  * needed to ensure results are in the original buffers.  Temporary space will be
  * freed.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  */
void ARMCII_Buf_finish_write_vec(void **orig_bufs, void **new_bufs, int count, int size) {
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    int i;

    for (i = 0; i < count; i++) {
      if (orig_bufs[i] != new_bufs[i]) {
        gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);
        ARMCII_Assert(mreg != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(new_bufs[i], orig_bufs[i], size);
        // gmr_put(mreg, new_bufs[i], orig_bufs[i], size, ARMCI_GROUP_WORLD.rank);
        gmr_dla_unlock(mreg);

        MPI_Free_mem(new_bufs[i]);
      }
    }

    free(new_bufs);
  }
}
示例#27
0
/** Free a group of MPI mutexes.  Collective on communicator used at the
  * time of creation.
  *
  * @param[in] hdl Handle to the group that will be freed
  * @return        MPI status
  */
int MPIX_Mutex_free(MPIX_Mutex * hdl_ptr)
{
    MPIX_Mutex hdl = *hdl_ptr;
    int i;

    for (i = 0; i < hdl->max_count; i++) {
        MPI_Win_free(&hdl->windows[i]);
    }

    if (hdl->bases != NULL) {
        for (i = 0; i < hdl->my_count; i++)
            MPI_Free_mem(hdl->bases[i]);

        free(hdl->bases);
    }

    MPI_Comm_free(&hdl->comm);
    free(hdl);
    hdl_ptr = NULL;

    return MPI_SUCCESS;
}
示例#28
0
int main(int argc, char *argv[])
{
    int rank, nproc;
    int i;
    MPI_Win win;
    int *tar_buf = NULL;
    int *orig_buf = NULL;
    MPI_Datatype derived_dtp;
    int errors = 0;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (nproc < 3) {
        fprintf(stderr, "Run this program with at least 3 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &orig_buf);
    MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &tar_buf);

    for (i = 0; i < DATA_SIZE; i++) {
        orig_buf[i] = 1;
        tar_buf[i] = 0;
    }

    MPI_Type_vector(COUNT, BLOCKLENGTH - 1, STRIDE, MPI_INT, &derived_dtp);
    MPI_Type_commit(&derived_dtp);

    MPI_Win_create(tar_buf, sizeof(int) * DATA_SIZE, sizeof(int),
                   MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    /***** test between rank 0 and rank 1 *****/

    if (rank == 1) {
        MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);

        for (i = 0; i < OPS_NUM; i++) {
            MPI_Accumulate(orig_buf, 1, derived_dtp,
                           0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win);
            MPI_Win_flush_local(0, win);
        }

        MPI_Win_unlock(0, win);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* check results */
    if (rank == 0) {
        for (i = 0; i < DATA_SIZE - COUNT; i++) {
            if (tar_buf[i] != OPS_NUM) {
                printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM);
                errors++;
            }
        }
    }

    for (i = 0; i < DATA_SIZE; i++) {
        tar_buf[i] = 0;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /***** test between rank 0 and rank 2 *****/

    if (rank == 2) {
        MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);

        for (i = 0; i < OPS_NUM; i++) {
            MPI_Accumulate(orig_buf, 1, derived_dtp,
                           0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win);
            MPI_Win_flush_local(0, win);
        }

        MPI_Win_unlock(0, win);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* check results */
    if (rank == 0) {
        for (i = 0; i < DATA_SIZE - COUNT; i++) {
            if (tar_buf[i] != OPS_NUM) {
                printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM);
                errors++;
            }
        }

        if (errors == 0)
            printf(" No Errors\n");
    }

    MPI_Win_free(&win);

    MPI_Type_free(&derived_dtp);

    MPI_Free_mem(orig_buf);
    MPI_Free_mem(tar_buf);

    MPI_Finalize();

    return 0;
}
示例#29
0
文件: lu.c 项目: dmlb2000/nwchem-cml
main(int argc, char *argv[])
{
    int i, j;
    int ch;
    extern char *optarg;
    int edge;
    int size;
    int nloop=5;
    double **ptr_loc;

    MP_INIT(arc,argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

    while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) {
        switch(ch) {
        case 'n':
            n = atoi(optarg);
            break;
        case 'b':
            block_size = atoi(optarg);
            break;
        case 'p':
            nproc = atoi(optarg);
            break;
        case 'h': {
            printf("Usage: LU, or \n");
            printf("       LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n");
            MP_BARRIER();
            MP_FINALIZE();
            exit(0);
        }
        }
    }

    if(me == 0) {
        printf("\n Blocked Dense LU Factorization\n");
        printf("     %d by %d Matrix\n", n, n);
        printf("     %d Processors\n", nproc);
        printf("     %d by %d Element Blocks\n", block_size, block_size);
        printf("\n");
    }

    num_rows = (int) sqrt((double) nproc);
    for (;;) {
        num_cols = nproc/num_rows;
        if (num_rows*num_cols == nproc)
            break;
        num_rows--;
    }

    nblocks = n/block_size;
    if (block_size * nblocks != n) {
        nblocks++;
    }

    edge = n%block_size;
    if (edge == 0) {
        edge = block_size;
    }

#ifdef DEBUG
    if(me == 0)
        for (i=0; i<nblocks; i++) {
            for (j=0; j<nblocks; j++)
                printf("%d ", block_owner(i, j));
            printf("\n");
        }
    MP_BARRIER();
    MP_FINALIZE();
    exit(0);
#endif

    for (i=0; i<nblocks; i++) {
        for (j=0; j<nblocks; j++) {
            if(block_owner(i,j) == me) {
                if ((i == nblocks-1) && (j == nblocks-1)) {
                    size = edge*edge;
                }
                else if ((i == nblocks-1) || (j == nblocks-1)) {
                    size = edge*block_size;
                }
                else {
                    size = block_size*block_size;
                }
                proc_bytes += size*sizeof(double);
            }
        }
    }

    ptr = (void **)malloc(nproc * sizeof(void *));
#ifdef MPI2_ONESIDED
    MPI_Alloc_mem(proc_bytes, MPI_INFO_NULL, &ptr[me]);
    MPI_Win_create((void*)ptr[me], proc_bytes, 1, MPI_INFO_NULL,
                   MPI_COMM_WORLD, &win);
    for(i=0; i<nproc; i++) ptr[i] = (double *)ptr[me];
    MPI_Barrier(MPI_COMM_WORLD);

#else
    /* initialize ARMCI */
    ARMCI_Init();
    ARMCI_Malloc(ptr, proc_bytes);
#endif

    a = (double **)malloc(nblocks*nblocks*sizeof(double *));
    if (a == NULL) {
        fprintf(stderr, "Could not malloc memory for a\n");
        exit(-1);
    }
    ptr_loc = (double **)malloc(nproc*sizeof(double *));
    for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i];
    for(i=0; i<nblocks; i ++) {
        for(j=0; j<nblocks; j++) {
            a[i+j*nblocks] = ptr_loc[block_owner(i, j)];
            if ((i == nblocks-1) && (j == nblocks-1)) {
                size = edge*edge;
            } else if ((i == nblocks-1) || (j == nblocks-1)) {
                size = edge*block_size;
            } else {
                size = block_size*block_size;
            }
            ptr_loc[block_owner(i, j)] += size;
        }
    }

    /* initialize the array */
    init_array();

    /* barrier to ensure all initialization is done */
    MP_BARRIER();

    /* to remove cold-start misses, all processors touch their own data */
    touch_array(block_size, me);
    MP_BARRIER();

    if(doprint) {
        if(me == 0) {
            printf("Matrix before LU decomposition\n");
            print_array(me);
        }
        MP_BARRIER();
    }

    lu(n, block_size, me); /* cold start */

    /* Starting the timer */

    MP_BARRIER();
    if(me == 0) start_timer();
    for(i=0; i<nloop; i++) lu(n, block_size, me);
    MP_BARRIER();

    /* Timer Stops here */
    if(me == 0)
        printf("\nRunning time = %lf milliseconds.\n\n",  elapsed_time()/nloop);
    printf("%d: (ngets=%d) Communication (get) time = %e milliseconds\n", me, get_cntr, comm_time*1000/nloop);

    if(doprint) {
        if(me == 0) {
            printf("after LU\n");
            print_array(me);
        }
        MP_BARRIER();
    }

    /* done */
#ifdef MPI2_ONESIDED
    MPI_Win_free(&win);
    MPI_Free_mem(ptr[me]);
#else
    ARMCI_Free(ptr[me]);
    ARMCI_Finalize();
#endif
    MP_FINALIZE();
}
示例#30
0
int main(int argc, char *argv[])
{
    MPI_Win win;
    int errors = 0;
    int rank, nproc, i;
    double *orig_buf;
    double *tar_buf;
    MPI_Datatype vector_dtp;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &orig_buf);
    MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &tar_buf);

    for (i = 0; i < DATA_SIZE; i++) {
        orig_buf[i] = 1.0;
        tar_buf[i]  = 0.5;
    }

    MPI_Type_vector(5 /* count */ , 3 /* blocklength */ , 5 /* stride */ , MPI_DOUBLE, &vector_dtp);
    MPI_Type_commit(&vector_dtp);

    MPI_Win_create(tar_buf, sizeof(double) * DATA_SIZE, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win);

    if (rank == 0) {
        MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
        MPI_Accumulate(orig_buf, 1, vector_dtp, 1, 0, 1, vector_dtp, MPI_SUM, win);
        MPI_Win_unlock(1, win);
    }

    MPI_Win_fence(0, win);

    if (rank == 1) {
        for (i = 0; i < DATA_SIZE; i++) {
            if (i % 5 < 3) {
                if (tar_buf[i] != 1.5) {
                    printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]);
                    errors++;
                }
            }
            else {
                if (tar_buf[i] != 0.5) {
                    printf("tar_buf[i] = %f (expected 0.5)\n", tar_buf[i]);
                    errors++;
                }
            }
        }
    }

    MPI_Type_free(&vector_dtp);

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
        MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
        MPI_Accumulate(orig_buf, DATA_SIZE, MPI_DOUBLE, 1, 0, DATA_SIZE, MPI_DOUBLE, MPI_SUM, win);
        MPI_Win_unlock(1, win);
    }

    MPI_Win_fence(0, win);

  if (rank == 1) {
        for (i = 0; i < DATA_SIZE; i++) {
            if (i % 5 < 3) {
                if (tar_buf[i] != 2.5) {
                    printf("tar_buf[i] = %f (expected 2.5)\n", tar_buf[i]);
                    errors++;
                }
            }
            else {
                if (tar_buf[i] != 1.5) {
                    printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]);
                    errors++;
                }
            }
        }
    }

    MPI_Win_free(&win);

    MPI_Free_mem(orig_buf);
    MPI_Free_mem(tar_buf);

    if (rank == 1) {
        if (errors == 0)
            printf(" No Errors\n");
    }

    MPI_Finalize();
    return 0;
}