Esempio n. 1
0
void HDF5Genome::setGenomeBottomDimensions(
  const vector<Sequence::UpdateInfo>& bottomDimensions)
{
  hal_size_t numBottomSegments = 0;
  for (vector<Sequence::UpdateInfo>::const_iterator i
          = bottomDimensions.begin(); i != bottomDimensions.end(); 
       ++i)
  {
    numBottomSegments += i->_numSegments;
  }
  H5::Exception::dontPrint();
  try
  {
    DataSet d = _group.openDataSet(bottomArrayName);
    _group.unlink(bottomArrayName);
  }
  catch (H5::Exception){}
  hal_size_t numChildren = _alignment->getChildNames(_name).size();
 
  // scale down the chunk size in order to keep chunks proportional to
  // the size of a bottom segment with two children.
  hsize_t chunk;
  _dcprops.getChunk(1, &chunk);  
  double scale = numChildren < 10 ? 1. : 10. / numChildren;
  chunk *= scale;
  DSetCreatPropList botDC;
  botDC.copy(_dcprops);
  botDC.setChunk(1, &chunk);

  _bottomArray.create(&_group, bottomArrayName, 
                      HDF5BottomSegment::dataType(numChildren), 
                      numBottomSegments + 1, &botDC, _numChunksInArrayBuffer);
  _numChildrenInBottomArray = numChildren;
  _childCache.clear();
}
Esempio n. 2
0
void HDF5Genome::setDimensions(
  const vector<Sequence::Info>& sequenceDimensions,
  bool storeDNAArrays)
{
  _totalSequenceLength = 0;
  hal_size_t totalSeq = sequenceDimensions.size();
  hal_size_t maxName = 0;
  
  // Copy segment dimensions to use the external interface
  vector<Sequence::UpdateInfo> topDimensions;
  topDimensions.reserve(sequenceDimensions.size());
  vector<Sequence::UpdateInfo> bottomDimensions;
  bottomDimensions.reserve(sequenceDimensions.size());

  // Compute summary info from the list of sequence Dimensions
  for (vector<Sequence::Info>::const_iterator i = sequenceDimensions.begin();
       i != sequenceDimensions.end(); 
       ++i)
  {
    _totalSequenceLength += i->_length;
    maxName = max(static_cast<hal_size_t>(i->_name.length()), maxName);
    topDimensions.push_back(
      Sequence::UpdateInfo(i->_name, i->_numTopSegments));
    bottomDimensions.push_back(
      Sequence::UpdateInfo(i->_name, i->_numBottomSegments));
  }

  // Unlink the DNA and segment arrays if they exist (using 
  // exceptions is the only way I know how right now).  Note that
  // the file needs to be refactored to take advantage of the new
  // space. 
  H5::Exception::dontPrint();
  try
  {
    DataSet d = _group.openDataSet(dnaArrayName);
    _group.unlink(dnaArrayName);
  }
  catch (H5::Exception){}
  try
  {
    DataSet d = _group.openDataSet(sequenceIdxArrayName);
    _group.unlink(sequenceIdxArrayName);
  }
  catch (H5::Exception){}
  try
  {
    DataSet d = _group.openDataSet(sequenceNameArrayName);
    _group.unlink(sequenceNameArrayName);
  }
  catch (H5::Exception){}

  if (_totalSequenceLength > 0 && storeDNAArrays == true)
  {
    hal_size_t arrayLength = _totalSequenceLength / 2;
    if (_totalSequenceLength % 2)
    {
      ++arrayLength;
      _rup->set(rupGroupName, "1");
    }
    else
    {
      _rup->set(rupGroupName, "0");
    }
    hsize_t chunk;
    _dcprops.getChunk(1, &chunk);
    // enalarge chunk size because dna bases are so much smaller
    // than segments.  (about 30x). we default to 10x enlargement
    // since the seem to compress about 3x worse.  
    chunk *= dnaChunkScale;
    DSetCreatPropList dnaDC;
    dnaDC.copy(_dcprops);
    dnaDC.setChunk(1, &chunk);
    _dnaArray.create(&_group, dnaArrayName, HDF5DNA::dataType(), 
                     arrayLength, &dnaDC, _numChunksInArrayBuffer);
  }
  if (totalSeq > 0)
  {
    _sequenceIdxArray.create(&_group, sequenceIdxArrayName, 
                             HDF5Sequence::idxDataType(), 
                             totalSeq + 1, &_dcprops, _numChunksInArrayBuffer);

    _sequenceNameArray.create(&_group, sequenceNameArrayName, 
                              HDF5Sequence::nameDataType(maxName + 1), 
                              totalSeq, &_dcprops, _numChunksInArrayBuffer);

    writeSequences(sequenceDimensions);    
  }
  
  // Do the same as above for the segments. 
  setGenomeTopDimensions(topDimensions);
  setGenomeBottomDimensions(bottomDimensions);

  _parentCache = NULL;
  _childCache.clear();
}