/*------------------------------------------------------------------------- * Function: H5D_mpio_spaces_xfer * * Purpose: Use MPI-IO to transfer data efficiently * directly between app buffer and file. * * Return: non-negative on success, negative on failure. * * Programmer: rky 980813 * * Notes: * For collective data transfer only since this would eventually call * H5FD_mpio_setup to do setup to eveually call MPI_File_set_view in * H5FD_mpio_read or H5FD_mpio_write. MPI_File_set_view is a collective * call. Letting independent data transfer use this route would result in * hanging. * * The preconditions for calling this routine are located in the * H5S_mpio_opt_possible() routine, which determines whether this routine * can be called for a given dataset transfer. * * Modifications: * rky 980918 * Added must_convert parameter to let caller know we can't optimize * the xfer. * * Albert Cheng, 001123 * Include the MPI_type freeing as part of cleanup code. * * QAK - 2002/04/02 * Removed the must_convert parameter and move preconditions to * H5S_mpio_opt_possible() routine * * QAK - 2002/06/17 * Removed 'disp' parameter from H5FD_mpio_setup routine and use the * address of the dataset in MPI_File_set_view() calls, as necessary. * * QAK - 2002/06/18 * Removed 'dc_plist' parameter, since it was not used. Also, switch to * getting the 'extra_offset' setting for each selection. * *------------------------------------------------------------------------- */ static herr_t H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, void *_buf /*out*/, hbool_t do_write ) { haddr_t addr; /* Address of dataset (or selection) within file */ size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */ hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */ MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */ hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */ mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */ hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */ uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */ int mpi_code; /* MPI return code */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_spaces_xfer); /* Check args */ assert (io_info); assert (io_info->dset); assert (file_space); assert (mem_space); assert (buf); assert (IS_H5FD_MPIO(io_info->dset->ent.file)); /* Make certain we have the correct type of property list */ assert(TRUE==H5P_isa_class(io_info->dxpl_id,H5P_DATASET_XFER)); /* create the MPI buffer type */ if (H5S_mpio_space_type( mem_space, elmt_size, /* out: */ &mpi_buf_type, &mpi_buf_count, &mpi_buf_offset, &mbt_is_derived )<0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type"); /* create the MPI file type */ if ( H5S_mpio_space_type( file_space, elmt_size, /* out: */ &mpi_file_type, &mpi_file_count, &mpi_file_offset, &mft_is_derived )<0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type"); /* Get the base address of the contiguous dataset or the chunk */ if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS) addr = H5D_contig_get_addr(io_info->dset) + mpi_file_offset; else { haddr_t chunk_addr; /* for collective chunk IO */ assert(io_info->dset->shared->layout.type == H5D_CHUNKED); chunk_addr=H5D_istore_get_addr(io_info,NULL); addr = H5F_BASE_ADDR(io_info->dset->ent.file) + chunk_addr + mpi_file_offset; } /* * Pass buf type, file type to the file driver. Request an MPI type * transfer (instead of an elementary byteblock transfer). */ if(H5FD_mpi_setup_collective(io_info->dxpl_id, mpi_buf_type, mpi_file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties"); plist_is_setup=1; /* Adjust the buffer pointer to the beginning of the selection */ buf+=mpi_buf_offset; /* transfer the data */ if (do_write) { if (H5F_block_write(io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed"); } else { if (H5F_block_read (io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed"); } done: /* Reset the dxpl settings */ if(plist_is_setup) { if(H5FD_mpi_teardown_collective(io_info->dxpl_id)<0) HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values"); } /* end if */ /* free the MPI buf and file types */ if (mbt_is_derived) { if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_buf_type ))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); } if (mft_is_derived) { if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_file_type ))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); } FUNC_LEAVE_NOAPI(ret_value); } /* end H5D_mpio_spaces_xfer() */
/*------------------------------------------------------------------------- * Function: H5B2_cache_hdr_flush * * Purpose: Flushes a dirty B-tree header to disk. * * Return: Non-negative on success/Negative on failure * * Programmer: Quincey Koziol * [email protected] * Feb 1 2005 * Changes: JRM -- 8/21/06 * Added the flags_ptr parameter. This parameter exists to * allow the flush routine to report to the cache if the * entry is resized or renamed as a result of the flush. * *flags_ptr is set to H5C_CALLBACK__NO_FLAGS_SET on entry. * *------------------------------------------------------------------------- */ static herr_t H5B2_cache_hdr_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5B2_t *bt2, unsigned UNUSED * flags_ptr) { H5WB_t *wb = NULL; /* Wrapped buffer for header data */ uint8_t hdr_buf[H5B2_HDR_BUF_SIZE]; /* Buffer for header */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5B2_cache_hdr_flush, FAIL) /* check arguments */ HDassert(f); HDassert(H5F_addr_defined(addr)); HDassert(bt2); if (bt2->cache_info.is_dirty) { H5B2_shared_t *shared; /* Shared B-tree information */ uint8_t *hdr; /* Pointer to header buffer */ uint8_t *p; /* Pointer into raw data buffer */ size_t size; /* Header size on disk */ uint32_t metadata_chksum; /* Computed metadata checksum value */ /* Get the pointer to the shared B-tree info */ shared = (H5B2_shared_t *)H5RC_GET_OBJ(bt2->shared); HDassert(shared); /* Wrap the local buffer for serialized header info */ if(NULL == (wb = H5WB_wrap(hdr_buf, sizeof(hdr_buf)))) HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "can't wrap buffer") /* Compute the size of the serialized B-tree header on disk */ size = H5B2_HEADER_SIZE(f); /* Get a pointer to a buffer that's large enough for header */ if(NULL == (hdr = (uint8_t *)H5WB_actual(wb, size))) HGOTO_ERROR(H5E_BTREE, H5E_NOSPACE, FAIL, "can't get actual buffer") /* Get temporary pointer to serialized header */ p = hdr; /* Magic number */ HDmemcpy(p, H5B2_HDR_MAGIC, (size_t)H5_SIZEOF_MAGIC); p += H5_SIZEOF_MAGIC; /* Version # */ *p++ = H5B2_HDR_VERSION; /* B-tree type */ *p++ = shared->type->id; /* Node size (in bytes) */ UINT32ENCODE(p, shared->node_size); /* Raw key size (in bytes) */ UINT16ENCODE(p, shared->rrec_size); /* Depth of tree */ UINT16ENCODE(p, shared->depth); /* Split & merge %s */ H5_CHECK_OVERFLOW(shared->split_percent, /* From: */ unsigned, /* To: */ uint8_t); *p++ = (uint8_t)shared->split_percent; H5_CHECK_OVERFLOW(shared->merge_percent, /* From: */ unsigned, /* To: */ uint8_t); *p++ = (uint8_t)shared->merge_percent; /* Root node pointer */ H5F_addr_encode(f, &p, bt2->root.addr); UINT16ENCODE(p, bt2->root.node_nrec); H5F_ENCODE_LENGTH(f, p, bt2->root.all_nrec); /* Compute metadata checksum */ metadata_chksum = H5_checksum_metadata(hdr, (size - H5B2_SIZEOF_CHKSUM), 0); /* Metadata checksum */ UINT32ENCODE(p, metadata_chksum); /* Write the B-tree header. */ HDassert((size_t)(p - hdr) == size); if(H5F_block_write(f, H5FD_MEM_BTREE, addr, size, dxpl_id, hdr) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to save B-tree header to disk") bt2->cache_info.is_dirty = FALSE; } /* end if */