int
mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh,
                                    void *buf,
                                    int count,
                                    struct ompi_datatype_t *datatype,
                                    ompi_status_public_t *status)
{

    

  int i, j,interleave_count=0, striping_unit=0;
  uint32_t iov_count=0,ti;
  struct iovec *decoded_iov=NULL, *temp_iov=NULL;
  size_t max_data = 0, total_bytes = 0; 
  int domain_size=0, *count_my_req_per_proc=NULL, count_my_req_procs;
  int count_other_req_procs,  ret=OMPI_SUCCESS;
  size_t *buf_indices=NULL;
  int local_count = 0, local_size=0,*aggregator_list = NULL;
  struct iovec *iov = NULL;
  
  OMPI_MPI_OFFSET_TYPE start_offset, end_offset, fd_size;
  OMPI_MPI_OFFSET_TYPE *start_offsets=NULL, *end_offsets=NULL;
  OMPI_MPI_OFFSET_TYPE *fd_start=NULL, *fd_end=NULL, min_st_offset;
  Flatlist_node *flat_buf=NULL;
  mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL;
  MPI_Aint send_buf_addr;
#if TIME_BREAKDOWN
  print_entry nentry;
#endif
  
  
  if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) {
    fh->f_flags = fh->f_flags |  OMPIO_CONTIGUOUS_MEMORY;
  }
  
  
  if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) {
    
    ret =   ompi_io_ompio_decode_datatype (fh,
					   datatype,
					   count,
					   buf,
					   &max_data,
					   &temp_iov,
					   &iov_count);
    if (OMPI_SUCCESS != ret ){
      goto exit;
    }

    send_buf_addr = (OPAL_PTRDIFF_TYPE)buf;
    decoded_iov = (struct iovec *)malloc 
      (iov_count * sizeof(struct iovec));

    for (ti = 0; ti < iov_count; ti ++){
      decoded_iov[ti].iov_base = (IOVBASE_TYPE *)(
	(OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - 
	send_buf_addr);
      decoded_iov[ti].iov_len = 
	temp_iov[ti].iov_len ;
      #if DEBUG_ON
      printf("d_offset[%d]: %ld, d_len[%d]: %ld\n",
	     ti, (OPAL_PTRDIFF_TYPE)decoded_iov[ti].iov_base,
	     ti, decoded_iov[ti].iov_len);
      #endif
    }
    
  }
  else{
    max_data = count * datatype->super.size;
  }
    
  if ( MPI_STATUS_IGNORE != status ) {
    status->_ucount = max_data;
  }

  
  if(-1 == mca_fcoll_two_phase_num_io_procs){
    ret = ompi_io_ompio_set_aggregator_props (fh, 
					      mca_fcoll_two_phase_num_io_procs,
					      max_data);
    if ( OMPI_SUCCESS != ret){
      return  ret;
    }
    
    mca_fcoll_two_phase_num_io_procs = 
      ceil((float)fh->f_size/fh->f_procs_per_group);
    
  }
  
  if (mca_fcoll_two_phase_num_io_procs > fh->f_size){
    mca_fcoll_two_phase_num_io_procs = fh->f_size;
  }
  
#if DEBUG_ON
  printf("Number of aggregators : %ld\n", mca_fcoll_two_phase_num_io_procs);
#endif

  aggregator_list = (int *) malloc (mca_fcoll_two_phase_num_io_procs *
				    sizeof(int));
  
  if ( NULL == aggregator_list ) {
    return OMPI_ERR_OUT_OF_RESOURCE;
  }
  
  for (i =0; i< mca_fcoll_two_phase_num_io_procs; i++){
    aggregator_list[i] = i;
  }
  
  
  ret = ompi_io_ompio_generate_current_file_view (fh, 
						  max_data, 
						  &iov, 
						  &local_count);
  
  
  if ( OMPI_SUCCESS != ret ){
    goto exit;
  }
  
  
  ret = fh->f_comm->c_coll.coll_allreduce (&max_data,
					   &total_bytes,
					   1,
					   MPI_DOUBLE,
					   MPI_SUM,
					   fh->f_comm,
					   fh->f_comm->c_coll.coll_allreduce_module);
  
  if ( OMPI_SUCCESS != ret ) {
    goto exit;
  }
  
  
    
  if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) {
    
    /* This datastructre translates between OMPIO->ROMIO its a little hacky!*/
    /* But helps to re-use romio's code for handling non-contiguous file-type*/
    flat_buf = (Flatlist_node *)malloc(sizeof(Flatlist_node));
    if ( NULL == flat_buf ){
      ret = OMPI_ERR_OUT_OF_RESOURCE;
      goto exit;
    }
    
    flat_buf->type = datatype;
    flat_buf->next = NULL;
    flat_buf->count = 0;
       
    
    local_size = iov_count/count;
    
    flat_buf->indices = 
      (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * 
					       sizeof(OMPI_MPI_OFFSET_TYPE));
    if ( NULL == flat_buf->indices ){
      ret = OMPI_ERR_OUT_OF_RESOURCE;
      goto exit;
      
    }
      
    flat_buf->blocklens = 
      (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * 
				     sizeof(OMPI_MPI_OFFSET_TYPE));
    if ( NULL == flat_buf->blocklens ){
      ret = OMPI_ERR_OUT_OF_RESOURCE;
	 goto exit;
    }
    
    flat_buf->count = local_size;
    i=0;j=0;
    while(j < local_size){
      flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[i].iov_base;
      flat_buf->blocklens[j] = decoded_iov[i].iov_len;
      if(i < (int)iov_count)
	i+=1;
      j+=1;
    }
    
#if DEBUG_ON
    printf("flat_buf_count : %d\n", flat_buf->count);
    for(i=0;i<flat_buf->count;i++){
      printf("%d: blocklen[%d] : %lld, indices[%d]: %lld \n",
	     fh->f_rank, i, flat_buf->blocklens[i], i ,flat_buf->indices[i]);
	 
    }
#endif
  }
  
#if DEBUG_ON
    printf("%d: fcoll:two_phase:write_all->total_bytes:%ld, local_count: %d\n",
	   fh->f_rank,total_bytes, local_count);
    for (i=0 ; i<local_count ; i++) {
      printf("%d: fcoll:two_phase:write_all:OFFSET:%ld,LENGTH:%ld\n",
	     fh->f_rank,
	     (size_t)iov[i].iov_base,
	     (size_t)iov[i].iov_len);
    }
    
    
#endif
    
    start_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[0].iov_base;
    end_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_base +
      (OMPI_MPI_OFFSET_TYPE)iov[local_count-1].iov_len - 1; 
    
#if DEBUG_ON
    printf("%d: fcoll:two_phase:write_all:START OFFSET:%ld,END OFFSET:%ld\n",
	   fh->f_rank,
	   (size_t)start_offset,
	   (size_t)end_offset);
    
#endif

    start_offsets = (OMPI_MPI_OFFSET_TYPE *)malloc
	(fh->f_size*sizeof(OMPI_MPI_OFFSET_TYPE));
 
    if ( NULL == start_offsets ){
      ret = OMPI_ERR_OUT_OF_RESOURCE;
      goto exit; 
    }

    end_offsets = (OMPI_MPI_OFFSET_TYPE *)malloc
	(fh->f_size*sizeof(OMPI_MPI_OFFSET_TYPE));
    
    if ( NULL == end_offsets ){
      ret =  OMPI_ERR_OUT_OF_RESOURCE;
      goto exit;
    }
    
    
    ret = fh->f_comm->c_coll.coll_allgather(&start_offset,
					    1,
					    MPI_LONG,
					    start_offsets,
					    1,
					    MPI_LONG,
					    fh->f_comm,
					    fh->f_comm->c_coll.coll_allgather_module);

    if ( OMPI_SUCCESS != ret ){
      goto exit;
    }


    ret = fh->f_comm->c_coll.coll_allgather(&end_offset,
					    1,
					    MPI_LONG,
					    end_offsets,
					    1,
					    MPI_LONG,
					    fh->f_comm,
					    fh->f_comm->c_coll.coll_allgather_module);


    if ( OMPI_SUCCESS != ret ){
      goto exit;
    }
				      
#if DEBUG_ON
    for (i=0;i<fh->f_size;i++){
	printf("%d: fcoll:two_phase:write_all:start[%d]:%ld,end[%d]:%ld\n",
	       fh->f_rank,i,
	       (size_t)start_offsets[i],i,
	       (size_t)end_offsets[i]);
    }
#endif



    for (i=1; i<fh->f_size; i++){
      if ((start_offsets[i] < end_offsets[i-1]) && 
	  (start_offsets[i] <= end_offsets[i])){
	interleave_count++;
      }
    }

#if DEBUG_ON
    printf("%d: fcoll:two_phase:write_all:interleave_count:%d\n",
	   fh->f_rank,interleave_count);
#endif 
    
    
    ret = mca_fcoll_two_phase_domain_partition(fh,
					       start_offsets,
					       end_offsets,
					       &min_st_offset,
					       &fd_start,
					       &fd_end,
					       domain_size, 
					       &fd_size,
					       striping_unit,
					       mca_fcoll_two_phase_num_io_procs);
    if ( OMPI_SUCCESS != ret ){
      goto exit;
    }
    
    
#if  DEBUG_ON
	for (i=0;i<mca_fcoll_two_phase_num_io_procs;i++){
	  printf("fd_start[%d] : %lld, fd_end[%d] : %lld, local_count: %d\n",
		   i, fd_start[i], i, fd_end[i], local_count);
	}
#endif
	
	
	ret = mca_fcoll_two_phase_calc_my_requests (fh,
						    iov,
						    local_count,
						    min_st_offset,
						    fd_start,
						    fd_end,
						    fd_size,
						    &count_my_req_procs,
						    &count_my_req_per_proc,
						    &my_req,
						    &buf_indices,
						    striping_unit,
						    mca_fcoll_two_phase_num_io_procs,
						    aggregator_list);
	if ( OMPI_SUCCESS != ret ){
	  goto exit;
	}
	
	

	ret = mca_fcoll_two_phase_calc_others_requests(fh,
						       count_my_req_procs,
						       count_my_req_per_proc,
						       my_req,
						       &count_other_req_procs,
						       &others_req);
	if (OMPI_SUCCESS != ret ){
	  goto exit;
	}
	
	
#if DEBUG_ON
	printf("count_other_req_procs : %d\n", count_other_req_procs);
#endif

#if TIME_BREAKDOWN
	  start_exch = MPI_Wtime();
#endif
	
	ret = two_phase_exch_and_write(fh,
				       buf,
				       datatype,
				       others_req,
				       iov,
				       local_count,
				       min_st_offset,
				       fd_size,
				       fd_start,
				       fd_end,
				       flat_buf,
				       buf_indices,
				       striping_unit,
				       aggregator_list);

	if (OMPI_SUCCESS != ret){
	  goto exit;
	}
	

#if TIME_BREAKDOWN
	end_exch = MPI_Wtime();
	exch_write += (end_exch - start_exch);
	
	nentry.time[0] = write_time;
	nentry.time[1] = comm_time;
	nentry.time[2] = exch_write;
	if (is_aggregator(fh->f_rank,
			  mca_fcoll_two_phase_num_io_procs,
			  aggregator_list)){
	  nentry.aggregator = 1;
	}
	else{
	  nentry.aggregator = 0;
	}
	nentry.nprocs_for_coll = mca_fcoll_two_phase_num_io_procs;
	if (!ompi_io_ompio_full_print_queue(WRITE_PRINT_QUEUE)){
	  ompi_io_ompio_register_print_entry(WRITE_PRINT_QUEUE,
					     nentry);
	}
#endif

 exit : 
	if (flat_buf != NULL) {

	  if (flat_buf->blocklens != NULL) {
	    free (flat_buf->blocklens);
	  }
	  
	  if (flat_buf->indices != NULL) {
	    free (flat_buf->indices);
	  }
	  free (flat_buf);

	}



	if (start_offsets != NULL) {
	  free(start_offsets);
	}
	
	if (end_offsets != NULL){
	  free(end_offsets);
	}
	if (aggregator_list != NULL){
	  free(aggregator_list);
	}

	return ret;
}
int
mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh,
				   void *buf,
				   int count,
				   struct ompi_datatype_t *datatype,
				   ompi_status_public_t *status)
{

    int ret = OMPI_SUCCESS, i = 0, j = 0, interleave_count = 0, striping_unit = 0;
    MPI_Aint recv_buf_addr = 0;
    uint32_t iov_count = 0, ti = 0;
    struct iovec *decoded_iov = NULL, *temp_iov = NULL, *iov = NULL;
    size_t max_data = 0;
    long long_max_data = 0, long_total_bytes = 0;
    int domain_size=0, *count_my_req_per_proc=NULL, count_my_req_procs = 0;
    int count_other_req_procs;
    size_t *buf_indices=NULL;
    int *aggregator_list = NULL, local_count = 0, local_size = 0;
    int two_phase_num_io_procs=1;
    OMPI_MPI_OFFSET_TYPE start_offset = 0, end_offset = 0, fd_size = 0;
    OMPI_MPI_OFFSET_TYPE *start_offsets=NULL, *end_offsets=NULL;
    OMPI_MPI_OFFSET_TYPE *fd_start=NULL, *fd_end=NULL, min_st_offset = 0;
    Flatlist_node *flat_buf=NULL;
    mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL;
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
    mca_common_ompio_print_entry nentry;
#endif
//    if (opal_datatype_is_predefined(&datatype->super)) {
//	fh->f_flags = fh->f_flags |  OMPIO_CONTIGUOUS_MEMORY;
//    }

    if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) {
	ret =   fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh,
				       datatype,
				       count,
				       buf,
				       &max_data,
				       &temp_iov,
				       &iov_count);
	if (OMPI_SUCCESS != ret ){
	    goto exit;
	}

	recv_buf_addr = (size_t)(buf);
	decoded_iov  = (struct iovec *) calloc
	    (iov_count, sizeof(struct iovec));

	for (ti = 0; ti < iov_count; ti++){

	    decoded_iov[ti].iov_base = (IOVBASE_TYPE *)
		((OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - recv_buf_addr);
	    decoded_iov[ti].iov_len = temp_iov[ti].iov_len;
#if DEBUG
	    printf("d_offset[%d]: %ld, d_len[%d]: %ld\n",
		   ti, (OPAL_PTRDIFF_TYPE)decoded_iov[ti].iov_base,
		   ti, decoded_iov[ti].iov_len);
#endif
	}

    }
    else{
	max_data = count * datatype->super.size;
    }

    if ( MPI_STATUS_IGNORE != status ) {
	status->_ucount = max_data;
    }

    fh->f_get_num_aggregators (&two_phase_num_io_procs);
    if (-1 == two_phase_num_io_procs ){
	ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh,
					  two_phase_num_io_procs,
					  max_data);
	if (OMPI_SUCCESS != ret){
            goto exit;
	}

	two_phase_num_io_procs = fh->f_final_num_aggrs;

    }

    if (two_phase_num_io_procs > fh->f_size){
	two_phase_num_io_procs = fh->f_size;
    }

    aggregator_list = (int *) calloc (two_phase_num_io_procs, sizeof(int));
    if (NULL == aggregator_list){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    for (i=0; i< two_phase_num_io_procs; i++){
	aggregator_list[i] = i * fh->f_size / two_phase_num_io_procs;
    }

    ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *)fh,
					    max_data,
					    &iov,
					    &local_count);

    if (OMPI_SUCCESS != ret){
	goto exit;
    }

    long_max_data = (long) max_data;
    ret = fh->f_comm->c_coll.coll_allreduce (&long_max_data,
					     &long_total_bytes,
					     1,
					     MPI_LONG,
					     MPI_SUM,
					     fh->f_comm,
					     fh->f_comm->c_coll.coll_allreduce_module);

    if ( OMPI_SUCCESS != ret ) {
	goto exit;
    }

    if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) {

	/* This datastructre translates between OMPIO->ROMIO its a little hacky!*/
	/* But helps to re-use romio's code for handling non-contiguous file-type*/
	/*Flattened datatype for ompio is in decoded_iov it translated into
	  flatbuf*/

	flat_buf = (Flatlist_node *)calloc(1, sizeof(Flatlist_node));
	if ( NULL == flat_buf ){
	    ret = OMPI_ERR_OUT_OF_RESOURCE;
	    goto exit;
	}

	flat_buf->type = datatype;
	flat_buf->next = NULL;
	flat_buf->count = 0;
	flat_buf->indices = NULL;
	flat_buf->blocklens = NULL;

	if ( 0 < count ) {
	    local_size = OMPIO_MAX(1,iov_count/count);
	}
	else {
	    local_size = 0;
	}


	if ( 0 < local_size ) {
	    flat_buf->indices =
		(OMPI_MPI_OFFSET_TYPE *)calloc(local_size,
					       sizeof(OMPI_MPI_OFFSET_TYPE));
	    if (NULL == flat_buf->indices){
		ret = OMPI_ERR_OUT_OF_RESOURCE;
		goto exit;
	    }

	    flat_buf->blocklens =
		(OMPI_MPI_OFFSET_TYPE *)calloc(local_size,
					       sizeof(OMPI_MPI_OFFSET_TYPE));
	    if ( NULL == flat_buf->blocklens ){
		ret = OMPI_ERR_OUT_OF_RESOURCE;
		goto exit;
	    }
	}
	flat_buf->count = local_size;
        for (j = 0 ; j < local_size ; ++j) {
	    flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[j].iov_base;
	    flat_buf->blocklens[j] = decoded_iov[j].iov_len;
	}

#if DEBUG
	printf("flat_buf count: %d\n",
	       flat_buf->count);
	for(i=0;i<flat_buf->count;i++){
	    printf("%d: blocklen[%d] : %lld, indices[%d]: %lld\n",
		   fh->f_rank, i, flat_buf->blocklens[i], i ,flat_buf->indices[i]);
	}
#endif
    }

#if DEBUG
    printf("%d: total_bytes:%ld, local_count: %d\n",
	   fh->f_rank, long_total_bytes, local_count);
    for (i=0 ; i<local_count ; i++) {
	printf("%d: fcoll:two_phase:read_all:OFFSET:%ld,LENGTH:%ld\n",
	       fh->f_rank,
	       (size_t)iov[i].iov_base,
	       (size_t)iov[i].iov_len);
    }
#endif

    start_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[0].iov_base;
    if ( 0 < local_count ) {
	end_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_base +
	    (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_len - 1;
    }
    else {
	end_offset = 0;
    }
#if DEBUG
    printf("%d: START OFFSET:%ld, END OFFSET:%ld\n",
	   fh->f_rank,
	   (size_t)start_offset,
	   (size_t)end_offset);
#endif

    start_offsets = (OMPI_MPI_OFFSET_TYPE *)calloc
	(fh->f_size, sizeof(OMPI_MPI_OFFSET_TYPE));

    if ( NULL == start_offsets ){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    end_offsets = (OMPI_MPI_OFFSET_TYPE *)calloc
	(fh->f_size, sizeof(OMPI_MPI_OFFSET_TYPE));

    if (NULL == end_offsets){
	ret = OMPI_ERR_OUT_OF_RESOURCE;
	goto exit;
    }

    ret = fh->f_comm->c_coll.coll_allgather(&start_offset,
					    1,
					    OMPI_OFFSET_DATATYPE,
					    start_offsets,
					    1,
					    OMPI_OFFSET_DATATYPE,
					    fh->f_comm,
					    fh->f_comm->c_coll.coll_allgather_module);

    if ( OMPI_SUCCESS != ret ){
	goto exit;
    }

    ret = fh->f_comm->c_coll.coll_allgather(&end_offset,
					    1,
					    OMPI_OFFSET_DATATYPE,
					    end_offsets,
					    1,
					    OMPI_OFFSET_DATATYPE,
					    fh->f_comm,
					    fh->f_comm->c_coll.coll_allgather_module);


    if ( OMPI_SUCCESS != ret ){
	goto exit;
    }

#if DEBUG
    for (i=0;i<fh->f_size;i++){
	printf("%d: start[%d]:%ld,end[%d]:%ld\n",
	       fh->f_rank,i,
	       (size_t)start_offsets[i],i,
	       (size_t)end_offsets[i]);
    }
#endif

    for (i=1; i<fh->f_size; i++){
	if ((start_offsets[i] < end_offsets[i-1]) &&
	    (start_offsets[i] <= end_offsets[i])){
	    interleave_count++;
	}
    }

#if DEBUG
    printf("%d: interleave_count:%d\n",
	   fh->f_rank,interleave_count);
#endif

    ret = mca_fcoll_two_phase_domain_partition(fh,
					       start_offsets,
					       end_offsets,
					       &min_st_offset,
					       &fd_start,
					       &fd_end,
					       domain_size,
					       &fd_size,
					       striping_unit,
					       two_phase_num_io_procs);
    if (OMPI_SUCCESS != ret){
	goto exit;
    }

#if DEBUG
    for (i=0;i<two_phase_num_io_procs;i++){
	printf("fd_start[%d] : %lld, fd_end[%d] : %lld, local_count: %d\n",
	       i, fd_start[i], i, fd_end[i], local_count);
    }
#endif

    ret = mca_fcoll_two_phase_calc_my_requests (fh,
						iov,
						local_count,
						min_st_offset,
						fd_start,
						fd_end,
						fd_size,
						&count_my_req_procs,
						&count_my_req_per_proc,
						&my_req,
						&buf_indices,
						striping_unit,
						two_phase_num_io_procs,
						aggregator_list);
    if ( OMPI_SUCCESS != ret ){
	goto exit;
    }

    ret = mca_fcoll_two_phase_calc_others_requests(fh,
						   count_my_req_procs,
						   count_my_req_per_proc,
						   my_req,
						   &count_other_req_procs,
						   &others_req);
    if (OMPI_SUCCESS != ret ){
	goto exit;
    }

#if DEBUG
    printf("%d count_other_req_procs : %d\n",
	   fh->f_rank,
	   count_other_req_procs);
#endif

#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
    start_rexch = MPI_Wtime();
#endif


    ret = two_phase_read_and_exch(fh,
				  buf,
				  datatype,
				  others_req,
				  iov,
				  local_count,
				  min_st_offset,
				  fd_size,
				  fd_start,
				  fd_end,
				  flat_buf,
				  buf_indices,
				  striping_unit,
				  two_phase_num_io_procs,
				  aggregator_list);


    if (OMPI_SUCCESS != ret){
	goto exit;
    }
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
    end_rexch = MPI_Wtime();
    read_exch += (end_rexch - start_rexch);
    nentry.time[0] = read_time;
    nentry.time[1] = rcomm_time;
    nentry.time[2] = read_exch;
    if (isread_aggregator(fh->f_rank,
			  two_phase_num_io_procs,
			  aggregator_list)){
	nentry.aggregator = 1;
    }
    else{
	nentry.aggregator = 0;
    }
    nentry.nprocs_for_coll = two_phase_num_io_procs;


    if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){
	mca_common_ompio_register_print_entry(fh->f_coll_read_time,
                                              nentry);
    }
#endif


exit:
    if (flat_buf != NULL){
	if (flat_buf->blocklens != NULL){
	    free (flat_buf->blocklens);
	}
	if (flat_buf->indices != NULL){
	    free (flat_buf->indices);
	}
        free (flat_buf);
    }

    free (start_offsets);
    free (end_offsets);
    free (aggregator_list);
    free (fd_start);
    free (decoded_iov);
    free (buf_indices);
    free (count_my_req_per_proc);
    free (my_req);
    free (others_req);
    free (fd_end);

    return ret;
}