Ejemplo n.º 1
0
int MPI_Address(void *location, MPI_Aint *address)
{

    int rank;
    
    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);    

    fprintf(stderr, "MPI_ADDRESS[%d]: location %0" PRIxPTR " address %0" PRIxPTR "\n",
            rank, (uintptr_t)location, (uintptr_t)address);
    fflush(stderr);
    
    return PMPI_Address(location, address);
}
Ejemplo n.º 2
0
int MPI_Address(void *location, MPI_Aint * address)
{
  return PMPI_Address(location, address);
}
Ejemplo n.º 3
0
/*
 * mpiPi_collect_basics() - all tasks send their basic info to the
 * collectorRank.
 */
void
mpiPi_collect_basics ()
{
  int i = 0;
  double app_time = mpiPi.cumulativeTime;
  int cnt;
  mpiPi_task_info_t mti;
  int blockcounts[4] = { 1, 1, 1, MPIPI_HOSTNAME_LEN_MAX };
  MPI_Datatype types[4] = { MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_CHAR };
  MPI_Aint displs[4];
  MPI_Datatype mti_type;
  MPI_Request *recv_req_arr;

  mpiPi_msg_debug ("Collect Basics\n");

  cnt = 0;

  PMPI_Address (&mti.mpi_time, &displs[cnt++]);
  PMPI_Address (&mti.app_time, &displs[cnt++]);
  PMPI_Address (&mti.rank, &displs[cnt++]);
  PMPI_Address (&mti.hostname, &displs[cnt++]);

  for (i = (cnt - 1); i >= 0; i--)
    {
      displs[i] -= displs[0];
    }
  PMPI_Type_struct (cnt, blockcounts, displs, types, &mti_type);
  PMPI_Type_commit (&mti_type);

  if (mpiPi.rank == mpiPi.collectorRank)
    {
      /* In the case where multiple reports are generated per run,
         only allocate memory for global_task_info once */
      if (mpiPi.global_task_info == NULL)
	{
	  mpiPi.global_task_info =
	    (mpiPi_task_info_t *) calloc (mpiPi.size,
					  sizeof (mpiPi_task_info_t));
	  if (mpiPi.global_task_info == NULL)
	    mpiPi_abort ("Failed to allocate memory for global_task_info");

	  mpiPi_msg_debug
	    ("MEMORY : Allocated for global_task_info :          %13ld\n",
	     mpiPi.size * sizeof (mpiPi_task_info_t));
	}

      bzero (mpiPi.global_task_info, mpiPi.size * sizeof (mpiPi_task_info_t));

      recv_req_arr =
	(MPI_Request *) malloc (sizeof (MPI_Request) * mpiPi.size);
      for (i = 0; i < mpiPi.size; i++)
	{
	  mpiPi_task_info_t *p = &mpiPi.global_task_info[i];
	  if (i != mpiPi.collectorRank)
	    {
	      PMPI_Irecv (p, 1, mti_type, i, mpiPi.tag,
			  mpiPi.comm, &(recv_req_arr[i]));
	    }
	  else
	    {
	      strcpy (p->hostname, mpiPi.hostname);
	      p->app_time = app_time;
	      p->rank = mpiPi.rank;
	      recv_req_arr[i] = MPI_REQUEST_NULL;
	    }
	}
      PMPI_Waitall (mpiPi.size, recv_req_arr, MPI_STATUSES_IGNORE);
      free (recv_req_arr);
      /* task MPI time is calculated from callsites data 
         in mpiPi_insert_callsite_records.
       */
      for (i = 0; i < mpiPi.size; i++)
	mpiPi.global_task_info[i].mpi_time = 0.0;
    }
  else
    {
      strcpy (mti.hostname, mpiPi.hostname);
      mti.app_time = app_time;
      mti.rank = mpiPi.rank;
      PMPI_Send (&mti, 1, mti_type, mpiPi.collectorRank,
		 mpiPi.tag, mpiPi.comm);
    }

  PMPI_Type_free (&mti_type);

  return;
}
static int two_phase_read_and_exch(mca_io_ompio_file_t *fh,
				   void *buf,
				   MPI_Datatype datatype,
				   mca_io_ompio_access_array_t *others_req,
				   struct iovec *offset_len,
				   int contig_access_count,
				   OMPI_MPI_OFFSET_TYPE min_st_offset,
				   OMPI_MPI_OFFSET_TYPE fd_size,
				   OMPI_MPI_OFFSET_TYPE *fd_start,
				   OMPI_MPI_OFFSET_TYPE *fd_end,
				   Flatlist_node *flat_buf,
				   size_t *buf_idx, int striping_unit,
				   int two_phase_num_io_procs,
				   int *aggregator_list){


    int ret=OMPI_SUCCESS, i = 0, j = 0, ntimes = 0, max_ntimes = 0;
    int m = 0;
    int *curr_offlen_ptr=NULL, *count=NULL, *send_size=NULL, *recv_size=NULL;
    int *partial_send=NULL, *start_pos=NULL, req_len=0, flag=0;
    int *recd_from_proc=NULL;
    MPI_Aint buftype_extent=0;
    size_t byte_size = 0;
    OMPI_MPI_OFFSET_TYPE st_loc=-1, end_loc=-1, off=0, done=0, for_next_iter=0;
    OMPI_MPI_OFFSET_TYPE size=0, req_off=0, real_size=0, real_off=0, len=0;
    OMPI_MPI_OFFSET_TYPE for_curr_iter=0;
    char *read_buf=NULL, *tmp_buf=NULL;
    MPI_Datatype byte = MPI_BYTE;
    int two_phase_cycle_buffer_size=0;

    opal_datatype_type_size(&byte->super,
			    &byte_size);

    for (i = 0; i < fh->f_size; i++){
	if (others_req[i].count) {
	    st_loc = others_req[i].offsets[0];
	    end_loc = others_req[i].offsets[0];
	    break;
	}
    }

    for (i=0;i<fh->f_size;i++){
	for(j=0;j< others_req[i].count; j++){
	    st_loc =
		OMPIO_MIN(st_loc, others_req[i].offsets[j]);
	    end_loc =
		OMPIO_MAX(end_loc, (others_req[i].offsets[j] +
				    others_req[i].lens[j] - 1));
	}
    }

    fh->f_get_bytes_per_agg ( &two_phase_cycle_buffer_size);
    ntimes = (int)((end_loc - st_loc + two_phase_cycle_buffer_size)/
		   two_phase_cycle_buffer_size);

    if ((st_loc == -1) && (end_loc == -1)){
	ntimes = 0;
    }

    fh->f_comm->c_coll.coll_allreduce (&ntimes,
				       &max_ntimes,
				       1,
				       MPI_INT,
				       MPI_MAX,
				       fh->f_comm,
				       fh->f_comm->c_coll.coll_allreduce_module);

    if (ntimes){
	read_buf = (char *) calloc (two_phase_cycle_buffer_size, sizeof(char));
	if ( NULL == read_buf ){
	    ret =  OMPI_ERR_OUT_OF_RESOURCE;
	    goto exit;
	}
    }

    curr_offlen_ptr = (int *)calloc (fh->f_size,
				     sizeof(int));
    if (NULL == curr_offlen_ptr){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    count = (int *)calloc (fh->f_size,
			   sizeof(int));
    if (NULL == count){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    partial_send = (int *)calloc(fh->f_size, sizeof(int));
    if ( NULL == partial_send ){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    send_size = (int *)malloc(fh->f_size * sizeof(int));
    if (NULL == send_size){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    recv_size = (int *)malloc(fh->f_size * sizeof(int));
    if (NULL == recv_size){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    recd_from_proc = (int *)calloc(fh->f_size,sizeof(int));
    if (NULL == recd_from_proc){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    start_pos = (int *) calloc(fh->f_size, sizeof(int));
    if ( NULL == start_pos ){
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto exit;
    }

    done = 0;
    off = st_loc;
    for_curr_iter = for_next_iter = 0;

    ompi_datatype_type_extent(datatype, &buftype_extent);

    for (m=0; m<ntimes; m++) {

	size = OMPIO_MIN((unsigned)two_phase_cycle_buffer_size, end_loc-st_loc+1-done);
	real_off = off - for_curr_iter;
	real_size = size + for_curr_iter;

	for (i=0; i<fh->f_size; i++) count[i] = send_size[i] = 0;
	for_next_iter = 0;

	for (i=0; i<fh->f_size; i++) {
	    if (others_req[i].count) {
		start_pos[i] = curr_offlen_ptr[i];
		for (j=curr_offlen_ptr[i]; j<others_req[i].count;
		     j++) {
		    if (partial_send[i]) {
			/* this request may have been partially
			   satisfied in the previous iteration. */
			req_off = others_req[i].offsets[j] +
			    partial_send[i];
			req_len = others_req[i].lens[j] -
			    partial_send[i];
			partial_send[i] = 0;
			/* modify the off-len pair to reflect this change */
			others_req[i].offsets[j] = req_off;
			others_req[i].lens[j] = req_len;
		    }
		    else {
			req_off = others_req[i].offsets[j];
			req_len = others_req[i].lens[j];
		    }
		    if (req_off < real_off + real_size) {
			count[i]++;
			PMPI_Address(read_buf+req_off-real_off,
				     &(others_req[i].mem_ptrs[j]));

			send_size[i] += (int)(OMPIO_MIN(real_off + real_size - req_off,
							(OMPI_MPI_OFFSET_TYPE)req_len));

			if (real_off+real_size-req_off < (OMPI_MPI_OFFSET_TYPE)req_len) {
			    partial_send[i] = (int) (real_off + real_size - req_off);
			    if ((j+1 < others_req[i].count) &&
				(others_req[i].offsets[j+1] <
				 real_off+real_size)) {
				/* this is the case illustrated in the
				   figure above. */
				for_next_iter = OMPIO_MAX(for_next_iter,
							  real_off + real_size - others_req[i].offsets[j+1]);
				/* max because it must cover requests
				   from different processes */
			    }
			    break;
			}
		    }
		    else break;
		}
		curr_offlen_ptr[i] = j;
	    }
	}
	flag = 0;
	for (i=0; i<fh->f_size; i++)
	    if (count[i]) flag = 1;

	if (flag) {

#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
	    start_read_time = MPI_Wtime();
#endif

	    len = size * byte_size;
	    fh->f_io_array = (mca_io_ompio_io_array_t *)calloc
		(1,sizeof(mca_io_ompio_io_array_t));
	    if (NULL == fh->f_io_array) {
		opal_output(1, "OUT OF MEMORY\n");
                ret = OMPI_ERR_OUT_OF_RESOURCE;
                goto exit;
	    }
	    fh->f_io_array[0].offset = (IOVBASE_TYPE *)(intptr_t)off;
	    fh->f_io_array[0].length = len;
	    fh->f_io_array[0].memory_address =
		read_buf+for_curr_iter;
	    fh->f_num_of_io_entries = 1;

	    if (fh->f_num_of_io_entries){
		if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
		    opal_output(1, "READ FAILED\n");
                    ret = OMPI_ERROR;
                    goto exit;
		}
	    }

#if 0
	    int ii;
	    printf("%d: len/4 : %lld\n",
		   fh->f_rank,
		   len/4);
	    for (ii = 0; ii < len/4 ;ii++){
		printf("%d: read_buf[%d]: %ld\n",
		       fh->f_rank,
		       ii,
		       (int *)read_buf[ii]);
	    }
#endif
	    fh->f_num_of_io_entries = 0;
	    if (NULL != fh->f_io_array) {
		free (fh->f_io_array);
		fh->f_io_array = NULL;
	    }

#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
	    end_read_time = MPI_Wtime();
	    read_time += (end_read_time - start_read_time);
#endif


	}

	for_curr_iter = for_next_iter;

	for (i=0; i< fh->f_size; i++){
	    recv_size[i]  = 0;
	}
	two_phase_exchange_data(fh, buf, offset_len,
				send_size, start_pos, recv_size, count,
				partial_send, recd_from_proc,
				contig_access_count,
				min_st_offset, fd_size, fd_start, fd_end,
				flat_buf, others_req, m, buf_idx,
				buftype_extent, striping_unit, two_phase_num_io_procs,
				aggregator_list);

	if (for_next_iter){
	    tmp_buf = (char *) calloc (for_next_iter, sizeof(char));
	    memcpy(tmp_buf,
		   read_buf+real_size-for_next_iter,
		   for_next_iter);
	    free(read_buf);
	    read_buf = (char *)malloc(for_next_iter+two_phase_cycle_buffer_size);
	    memcpy(read_buf, tmp_buf, for_next_iter);
	    free(tmp_buf);
	}

	off += size;
	done += size;
    }

    for (i=0; i<fh->f_size; i++) count[i] = send_size[i] = 0;
    for (m=ntimes; m<max_ntimes; m++)
	two_phase_exchange_data(fh, buf, offset_len, send_size,
				start_pos, recv_size, count,
				partial_send, recd_from_proc,
				contig_access_count,
				min_st_offset, fd_size, fd_start, fd_end,
				flat_buf, others_req, m, buf_idx,
				buftype_extent, striping_unit, two_phase_num_io_procs,
				aggregator_list);

exit:
    free (read_buf);
    free (curr_offlen_ptr);
    free (count);
    free (partial_send);
    free (send_size);
    free (recv_size);
    free (recd_from_proc);
    free (start_pos);

    return ret;

}