/*virtual*/ void MLFBinaryIndexBuilder::Populate(shared_ptr<Index>& index) /*override*/ { m_input.CheckIsOpenOrDie(); index->Reserve(filesize(m_input.File())); BufferedFileReader reader(m_bufferSize, m_input); if (reader.Empty()) RuntimeError("Input file is empty"); if (!m_corpus) RuntimeError("MLFBinaryIndexBuilder: corpus descriptor was not specified."); vector<char> buffer(MAX_UTTERANCE_LABEL_LENGTH); // Validate file label reader.TryReadBinarySegment(3, buffer.data()); std::string mlfLabel(buffer.data(),3); if (mlfLabel != MLF_BIN_LABEL) RuntimeError("MLFBinaryIndexBuilder: MLF binary file is malformed."); //Validate MLF format version reader.TryReadBinarySegment(sizeof(short), buffer.data()); short modelVersion = *(short*)buffer.data(); // Iterate over the bin MLF string uttrKey; while (readUtteranceLabel(modelVersion, reader, buffer, uttrKey)) { auto uttrId = m_corpus->KeyToId(uttrKey); reader.TryReadBinarySegment(sizeof(uint), buffer.data()); uint uttrFrameCount = *(uint*)buffer.data(); auto sequenceStartOffset = reader.GetFileOffset(); // Read size of this uttrs reader.TryReadBinarySegment(sizeof(ushort), buffer.data()); ushort uttrSamplesCount = *(ushort*)buffer.data(); // sample count, senone/count pairs size_t uttrSize =sizeof(ushort) + uttrSamplesCount * 2 * sizeof(ushort); IndexedSequence sequence; sequence.SetKey(uttrId) .SetNumberOfSamples(uttrFrameCount) .SetOffset(sequenceStartOffset) .SetSize(uttrSize); index->AddSequence(sequence); reader.SetFileOffset(reader.GetFileOffset() + uttrSamplesCount * 2 * sizeof(ushort)); } }