WrittenArraySampleIDPtr WriteStringArrayT( WrittenArraySampleMap &iMap, hid_t iGroup, const std::string &iName, const AbcA::ArraySample &iSamp, const AbcA::ArraySample::Key &iKey, int iCompressionLevel ) { // because strings are packed together, always write out the dimensions Dimensions dims = iSamp.getDimensions(); ABCA_ASSERT( dims.rank() > 0, "String type can not have a rank-0 array sample" ); std::string dimsName = iName + ".dims"; WriteDimensions( iGroup, dimsName, dims ); // See whether or not we've already stored this. WrittenArraySampleIDPtr writeID = iMap.find( iKey ); if ( writeID ) { CopyWrittenArray( iGroup, iName, writeID ); return writeID; } // Okay, need to actually store it. // It will be a dataset with an internal attribute for storing // the hash id. bool hasData = dims.numPoints() > 0; hid_t dspaceId = -1; Dimensions wdims; // Used to store the "fake" dimensions. std::vector<CharT> charBuffer; // Get the dimensions, validate sample size. if ( hasData ) { size_t extent = iSamp.getDataType().getExtent(); size_t numStrings = dims.numPoints() * extent; ABCA_ASSERT( dims.rank() > 0 && numStrings > 0, "Degenerate array sample in WriteStringArrayT" ); // Get the data out of the array sample. const StringT *strings = reinterpret_cast<const StringT *>( iSamp.getData() ); ABCA_ASSERT( strings, "Degenerate strings in WriteStringArrayT" ); // Compact the strings in the string array. CompactStrings( strings, numStrings, charBuffer ); // Create the dataspace. size_t len = charBuffer.size(); assert( len >= numStrings ); wdims = Dimensions( len ); HDimensions hdims( wdims ); dspaceId = H5Screate_simple( hdims.rank(), hdims.rootPtr(), NULL ); } else { dspaceId = H5Screate( H5S_NULL ); } ABCA_ASSERT( dspaceId >= 0, "WriteStringsT() Failed in dataspace constructor" ); DspaceCloser dspaceCloser( dspaceId ); hid_t dsetId = -1; if ( iCompressionLevel >= 0 && hasData ) { // Make a compression plist hid_t zipPlist = DsetGzipCreatePlist( wdims, iCompressionLevel > 9 ? 9 : iCompressionLevel ); PlistCloser plistCloser( zipPlist ); //std::cout << "Creating compressed data set named: " // << iName << " in group named: " << iGroup.name() // << std::endl; // Make the dataset. dsetId = H5Dcreate2( iGroup, iName.c_str(), GetFileDtype<CharT>(), dspaceId, H5P_DEFAULT, zipPlist, H5P_DEFAULT ); } else { //std::cout << "Creating uncompressed data set named: " // << iName << " in group named: " << iGroup.name() // << std::endl; dsetId = H5Dcreate2( iGroup, iName.c_str(), GetFileDtype<CharT>(), dspaceId, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT ); } DsetCloser dsetCloser(dsetId); ABCA_ASSERT( dsetId >= 0, "WriteArray() Failed in dataset constructor" ); // Write the data. if ( hasData ) { H5Dwrite( dsetId, GetNativeDtype<CharT>(), H5S_ALL, H5S_ALL, H5P_DEFAULT, &charBuffer.front() ); } // Write the key WriteKey( dsetId, "key", iKey ); writeID.reset( new WrittenArraySampleID( iKey, dsetId ) ); iMap.store( writeID ); // Return the reference. return writeID; }
static AbcA::ArraySamplePtr ReadStringArrayT( AbcA::ReadArraySampleCachePtr iCache, hid_t iParent, const std::string &iName, const AbcA::DataType &iDataType ) { assert( iDataType.getExtent() > 0 ); // Open the data set. hid_t dsetId = H5Dopen( iParent, iName.c_str(), H5P_DEFAULT ); ABCA_ASSERT( dsetId >= 0, "Cannot open dataset: " << iName ); DsetCloser dsetCloser( dsetId ); // Read the digest, if there's a cache. AbcA::ArraySample::Key key; bool foundDigest = ReadKey( dsetId, "key", key ); // If we found a digest and there's a cache, see // if we're in there, and return it if so. if ( foundDigest && iCache ) { AbcA::ReadArraySampleID found = iCache->find( key ); if ( found ) { AbcA::ArraySamplePtr ret = found.getSample(); assert( ret ); if ( ret->getDataType() != iDataType ) { ABCA_THROW( "ERROR: Read data type for dset: " << iName << ": " << ret->getDataType() << " does not match expected data type: " << iDataType ); } // Got it! return ret; } } // Okay, we haven't found it in a cache. // Read the data type. // Checking code. { hid_t dsetFtype = H5Dget_type( dsetId ); DtypeCloser dtypeCloser( dsetFtype ); hid_t nativeDtype = GetNativeDtype<CharT>(); ABCA_ASSERT( H5Tget_class( dsetFtype ) == H5Tget_class( nativeDtype ) && H5Tget_sign( dsetFtype ) == H5Tget_sign( nativeDtype ) // CJH They can now be different // sizes, because wchar_t is sometimes 16-bit, // but we always store 32 bit. // && H5Tget_size( dsetFtype ) == //H5Tget_size( nativeDtype ), , "Invalid datatype for stringT" ); } // String array datatypes require a "dimensions" to be stored // externally, since the strings themselves are stored in a compacted // array of rank 1. // This is an attribute called "dims" that lives in the dset itself. Dimensions realDims; ReadDimensions( dsetId, "dims", realDims ); ABCA_ASSERT( realDims.rank() > 0, "Degenerate rank in Dataset read" ); // Read the data space. hid_t dspaceId = H5Dget_space( dsetId ); ABCA_ASSERT( dspaceId >= 0, "Could not get dataspace for dataSet: " << iName ); DspaceCloser dspaceCloser( dspaceId ); AbcA::ArraySamplePtr ret; H5S_class_t dspaceClass = H5Sget_simple_extent_type( dspaceId ); if ( dspaceClass == H5S_SIMPLE ) { ABCA_ASSERT( realDims.numPoints() > 0, "Degenerate dims in Dataset read" ); size_t totalNumStrings = realDims.numPoints() * iDataType.getExtent(); // Get the dimensions Dimensions dims; int rank = H5Sget_simple_extent_ndims( dspaceId ); ABCA_ASSERT( rank == realDims.rank(), "H5Sget_simple_extent_ndims() failed." ); HDimensions hdims; hdims.setRank( rank ); rank = H5Sget_simple_extent_dims( dspaceId, hdims.rootPtr(), NULL ); ABCA_ASSERT( rank == hdims.rank(), "H5Sget_simple_extent_dims() " "found inconsistent ranks." << std::endl << "Expecting rank: " << hdims.rank() << " instead was: " << rank ); dims = hdims; ABCA_ASSERT( dims.numPoints() > 0, "Degenerate dims in Dataset read" ); // Create temporary char storage buffer. size_t totalNumChars = dims.numPoints() + 1; std::vector<CharT> charStorage( totalNumChars, ( CharT )0 ); // Read into it. herr_t status = H5Dread( dsetId, GetNativeDtype<CharT>(), H5S_ALL, H5S_ALL, H5P_DEFAULT, ( void * )&charStorage.front() ); ABCA_ASSERT( status >= 0, "Could not read string array from data set. Weird." ); // Make an appropriately dimensionalized (and manageable) // array of strings using the ArraySamples. ret = AbcA::AllocateArraySample( iDataType, realDims ); StringT *strings = reinterpret_cast<StringT*>( const_cast<void*>( ret->getData() ) ); assert( strings != NULL ); // This part is hard. We have to go through the one dimensional // array extracting each string. ExtractStrings<StringT,CharT>( strings, ( const CharT * )&charStorage.front(), totalNumChars, totalNumStrings ); } else if ( dspaceClass == H5S_NULL ) { // Num points should be zero here. ABCA_ASSERT( realDims.numPoints() == 0, "Expecting zero points in dimensions" ); ret = AbcA::AllocateArraySample( iDataType, realDims ); } else { ABCA_THROW( "Unexpected scalar dataspace encountered." ); } // Store if there is a cache. if ( foundDigest && iCache ) { AbcA::ReadArraySampleID stored = iCache->store( key, ret ); if ( stored ) { return stored.getSample(); } } // Otherwise, just leave! ArraySamplePtr returned by AllocateArraySample // already has fancy-dan deleter built in. // I REALLY LOVE SMART PTRS. return ret; }
//-***************************************************************************** WrittenArraySampleIDPtr WriteArray( WrittenArraySampleMap &iMap, hid_t iGroup, const std::string &iName, const AbcA::ArraySample &iSamp, const AbcA::ArraySample::Key &iKey, hid_t iFileType, hid_t iNativeType, int iCompressionLevel ) { // Dispatch to string writing utils. const AbcA::DataType &dataType = iSamp.getDataType(); if ( dataType.getPod() == kStringPOD ) { return WriteStringArray( iMap, iGroup, iName, iSamp, iKey, iCompressionLevel ); } else if ( dataType.getPod() == kWstringPOD ) { return WriteWstringArray( iMap, iGroup, iName, iSamp, iKey, iCompressionLevel ); } // write the dimensions as necessary Dimensions dims = iSamp.getDimensions(); size_t rank = dims.rank(); ABCA_ASSERT( rank > 0, "Cannot have a rank-0 array sample" ); // rank 1 is the most common case, and we can easily infer it's size // from the dataspace for non-strings, so don't bother writing it out if (rank > 1) { std::string dimsName = iName + ".dims"; WriteDimensions( iGroup, dimsName, dims ); } // See whether or not we've already stored this. WrittenArraySampleIDPtr writeID = iMap.find( iKey ); if ( writeID ) { CopyWrittenArray( iGroup, iName, writeID ); return writeID; } // Okay, need to actually store it. // It will be a dataset with an internal attribute for storing // the hash id. bool hasData = dims.numPoints() > 0; hid_t dspaceId = -1; if ( hasData ) { hsize_t hdim = dims.numPoints() * dataType.getExtent(); dspaceId = H5Screate_simple( 1, &hdim, NULL ); } else { dspaceId = H5Screate( H5S_NULL ); } ABCA_ASSERT( dspaceId >= 0, "WriteArray() Failed in dataspace construction" ); DspaceCloser dspaceCloser( dspaceId ); hid_t dsetId = -1; if ( iCompressionLevel >= 0 && hasData ) { // Make a compression plist hid_t zipPlist = DsetGzipCreatePlist( dims, iCompressionLevel > 9 ? 9 : iCompressionLevel ); PlistCloser plistCloser( zipPlist ); // Make the dataset. dsetId = H5Dcreate2( iGroup, iName.c_str(), iFileType, dspaceId, H5P_DEFAULT, zipPlist, H5P_DEFAULT ); } else { dsetId = H5Dcreate2( iGroup, iName.c_str(), iFileType, dspaceId, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT ); } DsetCloser dsetCloser(dsetId); ABCA_ASSERT( dsetId >= 0, "WriteArray() Failed in dataset constructor" ); // Write the data. if ( hasData ) { H5Dwrite( dsetId, iNativeType, H5S_ALL, H5S_ALL, H5P_DEFAULT, iSamp.getData() ); } // Write the array sample key. WriteKey( dsetId, "key", iKey ); writeID.reset( new WrittenArraySampleID( iKey, dsetId ) ); iMap.store( writeID ); // Return the reference. return writeID; }
//-***************************************************************************** AbcA::ArraySamplePtr ReadArray( AbcA::ReadArraySampleCachePtr iCache, hid_t iParent, const std::string &iName, const AbcA::DataType &iDataType, hid_t iFileType, hid_t iNativeType ) { // Dispatch string stuff. if ( iDataType.getPod() == kStringPOD ) { return ReadStringArray( iCache, iParent, iName, iDataType ); } else if ( iDataType.getPod() == kWstringPOD ) { return ReadWstringArray( iCache, iParent, iName, iDataType ); } assert( iDataType.getPod() != kStringPOD && iDataType.getPod() != kWstringPOD ); // Open the data set. hid_t dsetId = H5Dopen( iParent, iName.c_str(), H5P_DEFAULT ); ABCA_ASSERT( dsetId >= 0, "Cannot open dataset: " << iName ); DsetCloser dsetCloser( dsetId ); // Read the data space. hid_t dspaceId = H5Dget_space( dsetId ); ABCA_ASSERT( dspaceId >= 0, "Could not get dataspace for dataSet: " << iName ); DspaceCloser dspaceCloser( dspaceId ); AbcA::ArraySample::Key key; bool foundDigest = false; // if we are caching, get the key and see if it is being used if ( iCache ) { key.origPOD = iDataType.getPod(); key.readPOD = key.origPOD; key.numBytes = Util::PODNumBytes( key.readPOD ) * H5Sget_simple_extent_npoints( dspaceId ); foundDigest = ReadKey( dsetId, "key", key ); AbcA::ReadArraySampleID found = iCache->find( key ); if ( found ) { AbcA::ArraySamplePtr ret = found.getSample(); assert( ret ); if ( ret->getDataType().getPod() != iDataType.getPod() ) { ABCA_THROW( "ERROR: Read data type for dset: " << iName << ": " << ret->getDataType() << " does not match expected data type: " << iDataType ); } // Got it! return ret; } } // Okay, we haven't found it in a cache. // Read the data type. hid_t dtypeId = H5Dget_type( dsetId ); ABCA_ASSERT( dtypeId >= 0, "Could not get datatype for dataSet: " << iName ); DtypeCloser dtypeCloser( dtypeId ); ABCA_ASSERT( EquivalentDatatypes( iFileType, dtypeId ), "File DataType clash for array dataset: " << iName ); AbcA::ArraySamplePtr ret; H5S_class_t dspaceClass = H5Sget_simple_extent_type( dspaceId ); if ( dspaceClass == H5S_SIMPLE ) { // Get the dimensions int rank = H5Sget_simple_extent_ndims( dspaceId ); ABCA_ASSERT( rank == 1, "H5Sget_simple_extent_ndims() must be 1." ); hsize_t hdim = 0; rank = H5Sget_simple_extent_dims( dspaceId, &hdim, NULL ); Dimensions dims; std::string dimName = iName + ".dims"; if ( H5Aexists( iParent, dimName.c_str() ) ) { ReadDimensions( iParent, dimName, dims ); } else { dims.setRank(1); dims[0] = hdim / iDataType.getExtent(); } ABCA_ASSERT( dims.numPoints() > 0, "Degenerate dims in Dataset read" ); // Create a buffer into which we shall read. ret = AbcA::AllocateArraySample( iDataType, dims ); assert( ret->getData() ); // And... read into it. herr_t status = H5Dread( dsetId, iNativeType, H5S_ALL, H5S_ALL, H5P_DEFAULT, const_cast<void*>( ret->getData() ) ); ABCA_ASSERT( status >= 0, "H5Dread() failed." ); } else if ( dspaceClass == H5S_NULL ) { Dimensions dims; std::string dimName = iName + ".dims"; if ( H5Aexists( iParent, dimName.c_str() ) ) { ReadDimensions( iParent, dimName, dims ); ABCA_ASSERT( dims.rank() > 0, "Degenerate rank in Dataset read" ); // Num points should be zero here. ABCA_ASSERT( dims.numPoints() == 0, "Expecting zero points in dimensions" ); } else { dims.setRank(1); dims[0] = 0; } ret = AbcA::AllocateArraySample( iDataType, dims ); } else { ABCA_THROW( "Unexpected scalar dataspace encountered." ); } // Store if there is a cache. if ( foundDigest && iCache ) { AbcA::ReadArraySampleID stored = iCache->store( key, ret ); if ( stored ) { return stored.getSample(); } } // Otherwise, just leave! ArraySamplePtr returned by AllocateArraySample // already has fancy-dan deleter built in. // I REALLY LOVE SMART PTRS. return ret; }