// Randomize one more chunk if needed after the chunk cursor has been incremented. void SequenceRandomizer::RandomizeNextChunkIfNeeded() { if (m_currentChunkCursor < m_randomizedWindowEnd) { assert(m_currentChunkCursor >= m_chunkWindowBegin); return; } assert(m_randomizedWindowEnd == m_currentChunkCursor); if (m_randomizedWindowEnd == m_randomizedChunks.size()) { return; } // Chunk not yet randomized. // of the sample position we have to randomized (current + sampleCount). // We will randomize up to this chunk as the final position of windows end is guaranteed to have been determined // when all sequences up to that chunk have been randomized size_t nextRandomizationCursor = m_randomizedChunks[m_randomizedWindowEnd].m_randomizationWindow.m_end; while (nextRandomizationCursor < m_randomizedChunks.size() && m_randomizedChunks[nextRandomizationCursor].m_randomizationWindow.m_begin <= m_randomizedWindowEnd) { nextRandomizationCursor++; } // Determine the end chunk that we need to load into memory. ChunkIdType nextChunkWindowEnd = m_randomizedChunks[nextRandomizationCursor - 1].m_randomizationWindow.m_end; // Lets page in everything from m_currentRangeEndChunkIndex to endChunkIdx for (ChunkIdType i = m_chunkWindowEnd; i < nextChunkWindowEnd; ++i) { AddRandomizedSequencesForChunk(i); } size_t firstSequencePositionToRandomize = m_randomizationCursor == 0 ? 0 : m_randomizedChunks[m_randomizationCursor - 1].SequenceEndPosition(); size_t endSequencePosToRandomize = m_randomizedChunks[nextRandomizationCursor - 1].SequenceEndPosition(); for (size_t t = firstSequencePositionToRandomize; t < endSequencePosToRandomize; ++t) { // Get valid randomization range, expressed in chunks // TODO: This can be done more efficiently, we know the range of chunks already. const ChunkIdType currentChunkIdx = GetChunkIndexForSequencePosition(t); size_t chunkWindowBegin = m_randomizedChunks[currentChunkIdx].m_randomizationWindow.m_begin; size_t chunkWindowEnd = m_randomizedChunks[currentChunkIdx].m_randomizationWindow.m_end; // Get valid randomization range, expressed in sequence positions. size_t posBegin = m_randomizedChunks[chunkWindowBegin].m_sequencePositionStart; size_t posEnd = m_randomizedChunks[chunkWindowEnd - 1].SequenceEndPosition(); ChunkIdType tChunkIndex = GetChunkIndexForSequencePosition(t); auto& tSequence = GetRandomizedSequenceDescriptionByPosition(tChunkIndex, t); for (;;) { // Pick a sequence position from [posBegin, posEnd) const size_t j = RandMT(posBegin, posEnd, m_rng); // Pick up j sequence. ChunkIdType jChunkIndex = GetChunkIndexForSequencePosition(j); auto& jSequence = GetRandomizedSequenceDescriptionByPosition(jChunkIndex, j); // Try again if the sequence currently at j cannot be placed at position i. if (!IsValidForPosition(tChunkIndex, jSequence)) continue; // Try again if the sequence currently at i cannot be placed at position j. if (!IsValidForPosition(jChunkIndex, tSequence)) continue; // Swap and break out. std::swap(tSequence, jSequence); break; } } // Verify that we got it right for (size_t t = firstSequencePositionToRandomize; t < endSequencePosToRandomize; ++t) { // TODO assert only ChunkIdType tChunkIndex = GetChunkIndexForSequencePosition(t); if (!IsValidForPosition(tChunkIndex, GetRandomizedSequenceDescriptionByPosition(tChunkIndex, t))) { LogicError("SequenceRandomizer::RandomizeNextSequenceDescriptions: randomization logic mangled!"); } } // Let's recalculate number of samples in the randomized chunks for efficient indexing in seek. size_t sampleCount = 0; size_t randomizedChunk = m_randomizedWindowEnd - m_chunkWindowBegin; for (size_t index = 0; index < m_sequenceWindow[randomizedChunk].size(); index++) { sampleCount += m_sequenceWindow[randomizedChunk][index].m_numberOfSamples; } // Save the sample information. ChunkInfo info; info.numberOfSamples = sampleCount; info.start = m_randomizedChunkInfo.empty() ? 0 : m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples; m_randomizedChunkInfo.push_back(info); // Update the cursors. m_randomizedWindowEnd++; m_randomizationCursor = nextRandomizationCursor; m_chunkWindowEnd = nextChunkWindowEnd; if (m_verbosity) fprintf(stderr, "SequenceRandomizer::RandomizeNextChunkIfNeeded(): " "chunk window [%" PRIu64 "..%u), cursor %" PRIu64 ", " "randomized window [%" PRIu64 "..%" PRIu64 "), randomization cursor %" PRIu64 "\n", m_chunkWindowBegin, m_chunkWindowEnd, m_currentChunkCursor, m_chunkWindowBegin, m_randomizedWindowEnd, m_randomizationCursor); }
void SequenceRandomizer::RandomizeNextSequenceDescriptions(size_t sampleCount) { assert(m_currentSamplePosition <= m_nextSamplePositionNotYetRandomized); if (m_nextSamplePositionNotYetRandomized == m_randomizedChunks.back().SampleEndPosition()) { return; } if (m_currentSamplePosition + sampleCount < m_nextSamplePositionNotYetRandomized) { return; } if (m_nextSequencePositionNotYetRandomized == m_randomizedChunks.back().SequenceEndPosition()) { assert(false); return; } assert(m_nextSamplePositionNotYetRandomized >= m_randomizedChunks[0].m_samplePositionStart); size_t firstSamplePositionToRandomize = m_nextSamplePositionNotYetRandomized; size_t firstSequencePositionToRandomize = m_nextSequencePositionNotYetRandomized; // Find the smallest chunk index whose windows begin exceeds the chunk index // of the sample position we have to randomized (current + sampleCount). // We will randomize up to this chunk as the final position of windows end is guaranteed to have been determined // when all sequences up to that chunk have been randomized size_t lastSamplePositionChunkIdx = GetChunkIndexOf(m_currentSamplePosition + sampleCount - 1); size_t endChunkIdxToRandomize = lastSamplePositionChunkIdx; while (endChunkIdxToRandomize < m_randomizedChunks.size() && m_randomizedChunks[endChunkIdxToRandomize].m_randomizationWindow.m_begin <= lastSamplePositionChunkIdx) { endChunkIdxToRandomize++; } size_t endFramePosToRandomize = m_randomizedChunks[endChunkIdxToRandomize - 1].SampleEndPosition(); size_t endSequencePosToRandomize = m_randomizedChunks[endChunkIdxToRandomize - 1].SequenceEndPosition(); // Determine the range of chunks that need to be in m_sequenceWindows for us // to perform the necessary randomization size_t startChunkIdx = std::min(GetChunkIndexOf(m_currentSamplePosition), m_randomizedChunks[GetChunkIndexOf(firstSamplePositionToRandomize)].m_randomizationWindow.m_begin); size_t endChunkIdx = m_randomizedChunks[GetChunkIndexOf(endFramePosToRandomize - 1)].m_randomizationWindow.m_end; // Lets drop everything that is outside the new range [startChunkIdx, endChunkIdx) for (size_t i = m_currentRangeBeginChunkIndex; i < startChunkIdx; ++i) { m_sequenceWindow.pop_front(); m_chunkWindow.pop_front(); m_currentRangeBeginChunkIndex++; } // Lets page in everything from m_currentRangeEndChunkIndex to endChunkIdx for (size_t i = m_currentRangeEndChunkIndex; i < endChunkIdx; ++i) { AddRandomizedSequencesForChunk(i); } for (size_t t = firstSequencePositionToRandomize; t < endSequencePosToRandomize; ++t) { // Get valid randomization range, expressed in chunks const size_t currentChunkIdx = GetChunkIndexForSequencePosition(t); size_t chunkWindowBegin = m_randomizedChunks[currentChunkIdx].m_randomizationWindow.m_begin; size_t chunkWindowEnd = m_randomizedChunks[currentChunkIdx].m_randomizationWindow.m_end; // Get valid randomization range, expressed in sequence positions. size_t posBegin = m_randomizedChunks[chunkWindowBegin].m_sequencePositionStart; size_t posEnd = m_randomizedChunks[chunkWindowEnd - 1].SequenceEndPosition(); for (;;) { // Pick a sequence position from [posBegin, posEnd) const size_t j = rand(posBegin, posEnd); // Try again if the sequence currently at j cannot be placed at position i. if (!IsValidForPosition(t, GetRandomizedSequenceDescriptionBySequenceId(j))) continue; // Try again if the sequence currently at i cannot be placed at position j. if (!IsValidForPosition(j, GetRandomizedSequenceDescriptionBySequenceId(t))) continue; // Swap and break out. std::swap(GetRandomizedSequenceDescriptionBySequenceId(t), GetRandomizedSequenceDescriptionBySequenceId(j)); // TODO old swap was perhaps more efficient break; } } // Verify that we got it right for (size_t t = firstSequencePositionToRandomize; t < endSequencePosToRandomize; ++t) { // TODO assert only if (!IsValidForPosition(t, GetRandomizedSequenceDescriptionBySequenceId(t))) { LogicError("SequenceRandomizer::RandomizeNextSequenceDescriptions: randomization logic mangled!"); } } m_nextSamplePositionNotYetRandomized = endFramePosToRandomize; m_nextSequencePositionNotYetRandomized = endSequencePosToRandomize; }