void HDF5Genome::setGenomeBottomDimensions( const vector<Sequence::UpdateInfo>& bottomDimensions) { hal_size_t numBottomSegments = 0; for (vector<Sequence::UpdateInfo>::const_iterator i = bottomDimensions.begin(); i != bottomDimensions.end(); ++i) { numBottomSegments += i->_numSegments; } H5::Exception::dontPrint(); try { DataSet d = _group.openDataSet(bottomArrayName); _group.unlink(bottomArrayName); } catch (H5::Exception){} hal_size_t numChildren = _alignment->getChildNames(_name).size(); // scale down the chunk size in order to keep chunks proportional to // the size of a bottom segment with two children. hsize_t chunk; _dcprops.getChunk(1, &chunk); double scale = numChildren < 10 ? 1. : 10. / numChildren; chunk *= scale; DSetCreatPropList botDC; botDC.copy(_dcprops); botDC.setChunk(1, &chunk); _bottomArray.create(&_group, bottomArrayName, HDF5BottomSegment::dataType(numChildren), numBottomSegments + 1, &botDC, _numChunksInArrayBuffer); _numChildrenInBottomArray = numChildren; _childCache.clear(); }
void HDF5Genome::setDimensions( const vector<Sequence::Info>& sequenceDimensions, bool storeDNAArrays) { _totalSequenceLength = 0; hal_size_t totalSeq = sequenceDimensions.size(); hal_size_t maxName = 0; // Copy segment dimensions to use the external interface vector<Sequence::UpdateInfo> topDimensions; topDimensions.reserve(sequenceDimensions.size()); vector<Sequence::UpdateInfo> bottomDimensions; bottomDimensions.reserve(sequenceDimensions.size()); // Compute summary info from the list of sequence Dimensions for (vector<Sequence::Info>::const_iterator i = sequenceDimensions.begin(); i != sequenceDimensions.end(); ++i) { _totalSequenceLength += i->_length; maxName = max(static_cast<hal_size_t>(i->_name.length()), maxName); topDimensions.push_back( Sequence::UpdateInfo(i->_name, i->_numTopSegments)); bottomDimensions.push_back( Sequence::UpdateInfo(i->_name, i->_numBottomSegments)); } // Unlink the DNA and segment arrays if they exist (using // exceptions is the only way I know how right now). Note that // the file needs to be refactored to take advantage of the new // space. H5::Exception::dontPrint(); try { DataSet d = _group.openDataSet(dnaArrayName); _group.unlink(dnaArrayName); } catch (H5::Exception){} try { DataSet d = _group.openDataSet(sequenceIdxArrayName); _group.unlink(sequenceIdxArrayName); } catch (H5::Exception){} try { DataSet d = _group.openDataSet(sequenceNameArrayName); _group.unlink(sequenceNameArrayName); } catch (H5::Exception){} if (_totalSequenceLength > 0 && storeDNAArrays == true) { hal_size_t arrayLength = _totalSequenceLength / 2; if (_totalSequenceLength % 2) { ++arrayLength; _rup->set(rupGroupName, "1"); } else { _rup->set(rupGroupName, "0"); } hsize_t chunk; _dcprops.getChunk(1, &chunk); // enalarge chunk size because dna bases are so much smaller // than segments. (about 30x). we default to 10x enlargement // since the seem to compress about 3x worse. chunk *= dnaChunkScale; DSetCreatPropList dnaDC; dnaDC.copy(_dcprops); dnaDC.setChunk(1, &chunk); _dnaArray.create(&_group, dnaArrayName, HDF5DNA::dataType(), arrayLength, &dnaDC, _numChunksInArrayBuffer); } if (totalSeq > 0) { _sequenceIdxArray.create(&_group, sequenceIdxArrayName, HDF5Sequence::idxDataType(), totalSeq + 1, &_dcprops, _numChunksInArrayBuffer); _sequenceNameArray.create(&_group, sequenceNameArrayName, HDF5Sequence::nameDataType(maxName + 1), totalSeq, &_dcprops, _numChunksInArrayBuffer); writeSequences(sequenceDimensions); } // Do the same as above for the segments. setGenomeTopDimensions(topDimensions); setGenomeBottomDimensions(bottomDimensions); _parentCache = NULL; _childCache.clear(); }